<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:25:25 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2464] recovery can&apos;t be finished forever</title>
                <link>https://jira.whamcloud.com/browse/LU-2464</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;we have &quot;never recovery finished&quot; conditions at the customer site. Even it took a couple of hours after MDT starts, it was still RECOVERING in recovery_status. We tried umount and remount, but it was still same situation and denied new clients connection Finally, we did &quot;-o abort_recovery&quot; to mount options, to fix this problem. So, why the recovery can&apos;t finished in reasonable time?&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /proc/fs/lustre/mds/*/recovery_status
status: RECOVERING
recovery_start: 0
time_remaining: 0
connected_clients: 0/2
delayed_clients: 0/2
completed_clients: 0/2
replayed_requests: 0/??
queued_requests: 0
next_transno: 38672353440
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Lustre-1.8.8, Infiniband</environment>
        <key id="16894">LU-2464</key>
            <summary>recovery can&apos;t be finished forever</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Tue, 11 Dec 2012 10:36:40 +0000</created>
                <updated>Mon, 8 Apr 2013 16:48:25 +0000</updated>
                            <resolved>Mon, 8 Apr 2013 16:48:25 +0000</resolved>
                                    <version>Lustre 1.8.x (1.8.0 - 1.8.5)</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="49045" author="johann" created="Tue, 11 Dec 2012 10:58:51 +0000"  >&lt;p&gt;Ihara, the recovery timer only starts once the first client reconnects. Since none of the clients have reconnected yet, recovery is still in progress.&lt;/p&gt;</comment>
                            <comment id="49047" author="ihara" created="Tue, 11 Dec 2012 11:10:10 +0000"  >&lt;p&gt;which clients MDS was waiting for recovery?&lt;/p&gt;</comment>
                            <comment id="49050" author="johann" created="Tue, 11 Dec 2012 11:54:39 +0000"  >&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;connected_clients: 0/2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It is waiting for 2 clients, but i can&apos;t tell you which ones (we only store the UUID in the last_rcvd file).&lt;/p&gt;</comment>
                            <comment id="49054" author="ihara" created="Tue, 11 Dec 2012 12:21:01 +0000"  >&lt;p&gt;So, if we can&apos;t find these two clients or can&apos;t connect clients by some reasones (e.g. h/w or s/w problems), but don&apos;t want to finish recovery, is abort_recovery only way?&lt;/p&gt;</comment>
                            <comment id="49093" author="adilger" created="Tue, 11 Dec 2012 23:09:53 +0000"  >&lt;p&gt;Ihara, to avoid the problem where the server is disconnected from the network, the recovery timer does not start until &lt;em&gt;any&lt;/em&gt; client tries to connect to the server. If you know that no clients will connect then abort_recovery will speed this up. Otherwise, recovery will start when the first client tries to mount the filesystem. &lt;/p&gt;</comment>
                            <comment id="49124" author="pjones" created="Wed, 12 Dec 2012 09:17:05 +0000"  >&lt;p&gt;Assigning to Bruno for any follow on questions&lt;/p&gt;</comment>
                            <comment id="49596" author="bfaccini" created="Sat, 22 Dec 2012 05:08:57 +0000"  >&lt;p&gt;Ihara,&lt;br/&gt;
Is there anything more we can do on this ticket ?? If not, can we close it ??&lt;/p&gt;</comment>
                            <comment id="49597" author="ihara" created="Sat, 22 Dec 2012 05:42:33 +0000"  >&lt;p&gt;Bruno, &lt;/p&gt;

&lt;p&gt;we had same problem at same customer twice after you reviewed. but we couldn&apos;t get crashdump due to some hardware configuration problem. but now it should work and once we hit same problem again, we should be able to give you for more deep analysis. &lt;/p&gt;

&lt;p&gt;please keep this open and will you updates.&lt;/p&gt;</comment>
                            <comment id="51205" author="bfaccini" created="Fri, 25 Jan 2013 09:56:14 +0000"  >&lt;p&gt;Ihara, No news ?&lt;/p&gt;</comment>
                            <comment id="51206" author="ihara" created="Fri, 25 Jan 2013 10:03:26 +0000"  >&lt;p&gt;Bruno, the last problem was fixed by abort_recovery and we don&apos;t need additinal investigation on this. please close this ticket and let me open the new ticket if we see same problme at this customer.&lt;br/&gt;
Thanks! &lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="12088" name="LFS01-MDS-ALPL105-msg.log" size="155758" author="ihara" created="Tue, 11 Dec 2012 10:36:40 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvdlj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5809</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>