<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:40:00 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10993] Fix for LU-10826 is problematic and skips recvoery</title>
                <link>https://jira.whamcloud.com/browse/LU-10993</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;I think aptch &lt;a href=&quot;https://review.whamcloud.com/#/c/31690/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/31690/&lt;/a&gt; for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10826&quot; title=&quot;Regression in LU-9372 on OPA enviroment and no recovery triggered&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10826&quot;&gt;&lt;del&gt;LU-10826&lt;/del&gt;&lt;/a&gt; is more problematic.&lt;br/&gt;
after apply patch &lt;a href=&quot;https://review.whamcloud.com/#/c/31690/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/31690/&lt;/a&gt; and test_req_buffer_pressure=1, it prevents OOM, but they are skipping some recvoery clients. &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@voss05 ~]#  lctl get_param obdfilter.*.recovery_status
obdfilter.scratch-OST0024.recovery_status=
status: COMPLETE
recovery_start: 1525317355
recovery_duration: 54
completed_clients: 7249/7249
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST0025.recovery_status=
status: COMPLETE
recovery_start: 1525317353
recovery_duration: 56
completed_clients: 7031/7031
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST0026.recovery_status=
status: COMPLETE
recovery_start: 1525317352
recovery_duration: 57
completed_clients: 8168/8168
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST0027.recovery_status=
status: COMPLETE
recovery_start: 1525317350
recovery_duration: 59
completed_clients: 8195/8195
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST0028.recovery_status=
status: COMPLETE
recovery_start: 1525317355
recovery_duration: 54
completed_clients: 7984/7984
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST0029.recovery_status=
status: COMPLETE
recovery_start: 1525317352
recovery_duration: 57
completed_clients: 7985/7985
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST002a.recovery_status=
status: COMPLETE
recovery_start: 1525317354
recovery_duration: 55
completed_clients: 8329/8329
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST002b.recovery_status=
status: COMPLETE
recovery_start: 1525317351
recovery_duration: 58
completed_clients: 8291/8291
replayed_requests: 0
last_transno: 98784247808
VBR: DISABLED
IR: ENABLED
obdfilter.scratch-OST002c.recovery_status=
status: COMPLETE
recovery_start: 1525317350
recovery_duration: 59
completed_clients: 8286/8286
replayed_requests: 0
last_transno: 94489280512
VBR: DISABLED
IR: ENABLED
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And, aslo sometimes, recovery still never triggered. e.g failover situation. &lt;br/&gt;
I see the messages after restart OSTs&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 9169.158440] Lustre: 14598:0:(events.c:368:request_in_callback()) All ost request buffers busy
[ 9169.158447] Lustre: 14598:0:(events.c:368:request_in_callback()) Skipped 3508 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="52099">LU-10993</key>
            <summary>Fix for LU-10826 is problematic and skips recvoery</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Thu, 3 May 2018 03:47:26 +0000</created>
                <updated>Sun, 16 Jan 2022 08:34:13 +0000</updated>
                            <resolved>Sun, 16 Jan 2022 08:34:13 +0000</resolved>
                                    <version>Lustre 2.12.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="227173" author="bfaccini" created="Thu, 3 May 2018 08:33:51 +0000"  >&lt;p&gt;Hello Shuichi, &lt;br/&gt;
Why do you think that some recovery clients are being missed ? Because you expect the number of completed_clients to be the same for each OST ?&lt;/p&gt;

&lt;p&gt;Also, the &quot;(events.c:368:request_in_callback()) All ost request buffers busy&quot; is expected to occur when running when test_req_buffer_pressure=1.&lt;/p&gt;</comment>
                            <comment id="227174" author="ihara" created="Thu, 3 May 2018 08:55:51 +0000"  >&lt;p&gt;Yes, and I&apos;ve checked client side, but they didn&apos;t connect to OST even reveroy stat is completed.&lt;br/&gt;
Another prolbem. there are 40 x OSS here and some of OSS triggered recovery, but still many OSS didn&apos;t trigger recovery.&lt;br/&gt;
Actually, if we do umounted OSTs and remount them again on those OSS, recoery retriggered, but not all clients to recover.&lt;br/&gt;
might imcomplete patch of &lt;a href=&quot;https://review.whamcloud.com/#/c/31690/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/31690/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="227300" author="bfaccini" created="Fri, 4 May 2018 07:24:24 +0000"  >&lt;p&gt;I know it is not a simple task, but as you seem to be able to reproduce easily, can you try to reduce the test to a minimal sub-set of OSS&apos;s OSTs and connected Clients and then take a full Lustre debug log on OSS and Clients ? I would like to get at least the trace from OSS and from both a successful and failed Clients.&lt;br/&gt;
In the mean time I will try to reproduce on a test platform.&lt;/p&gt;</comment>
                            <comment id="227303" author="ihara" created="Fri, 4 May 2018 08:18:40 +0000"  >&lt;p&gt;ok, let me know what exact information do you need.&lt;br/&gt;
at least, non re-trigger recvoery situation could be easy possible to reproduce.&lt;/p&gt;</comment>
                            <comment id="227317" author="bfaccini" created="Fri, 4 May 2018 13:03:03 +0000"  >&lt;p&gt;&amp;gt; ok, let me know what exact information do you need.&lt;br/&gt;
Well, like what I have already indicated in my previous comment! : &quot;can you try to reduce the test to a minimal sub-set of OSS&apos;s OSTs and connected Clients and then take a full Lustre debug log on OSS and Clients ? I would like to get at least the trace from OSS and from both a successful and failed Clients.&quot;&lt;/p&gt;

</comment>
                            <comment id="228960" author="bfaccini" created="Fri, 1 Jun 2018 14:04:14 +0000"  >&lt;p&gt;Hello Shuichi,&lt;br/&gt;
Just a small update to let you know that the attempts to reproduce this problem have all been unsuccessful until now.&lt;br/&gt;
BTW, did you find sometime to reproduce again on your side and in order to provide the infos I have requested before?&lt;/p&gt;</comment>
                            <comment id="232524" author="pjones" created="Thu, 23 Aug 2018 17:25:09 +0000"  >&lt;p&gt;Mike&lt;/p&gt;

&lt;p&gt;Could you please assess this situation?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="234856" author="pjones" created="Fri, 12 Oct 2018 19:20:29 +0000"  >&lt;p&gt;Descoping from 2.12 for now as there is not enough to work on. We can certainly continue to work this as soon as there is some more data available&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="51413">LU-10826</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzwqn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>