<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:27:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2701] recovery-small test 27 umount hang </title>
                <link>https://jira.whamcloud.com/browse/LU-2701</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Some recent landing introduced a problem in osp cleanup.&lt;br/&gt;
Specifically test 27 of recovery small seems to be affected.&lt;br/&gt;
This test specifically breaks osc communication and perhaps osp is not able to recover?&lt;br/&gt;
trace of hung umount:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 28642  TASK: ffff880097b0a140  CPU: 6   COMMAND: &quot;umount&quot;
 #0 [ffff88007b6c1898] schedule at ffffffff814f7c98
 #1 [ffff88007b6c1960] osp_sync_fini at ffffffffa069d09d [osp]
 #2 [ffff88007b6c19c0] osp_process_config at ffffffffa06972c0 [osp]
 #3 [ffff88007b6c1a20] lod_cleanup_desc_tgts at ffffffffa05ed564 [lod]
 #4 [ffff88007b6c1a70] lod_process_config at ffffffffa05f0266 [lod]
 #5 [ffff88007b6c1af0] mdd_process_config at ffffffffa0427c4b [mdd]
 #6 [ffff88007b6c1b50] mdt_stack_fini at ffffffffa0726b21 [mdt]
 #7 [ffff88007b6c1bb0] mdt_device_fini at ffffffffa072799a [mdt]
 #8 [ffff88007b6c1bf0] class_cleanup at ffffffffa0fb5247 [obdclass]
 #9 [ffff88007b6c1c70] class_process_config at ffffffffa0fb6b2c [obdclass]
#10 [ffff88007b6c1d00] class_manual_cleanup at ffffffffa0fb7869 [obdclass]
#11 [ffff88007b6c1dc0] server_put_super at ffffffffa0fc83bc [obdclass]
#12 [ffff88007b6c1e30] generic_shutdown_super at ffffffff8117d6ab
#13 [ffff88007b6c1e50] kill_anon_super at ffffffff8117d796
#14 [ffff88007b6c1e70] lustre_kill_super at ffffffffa0fb9666 [obdclass]
#15 [ffff88007b6c1e90] deactivate_super at ffffffff8117e825
#16 [ffff88007b6c1eb0] mntput_no_expire at ffffffff8119a89f
#17 [ffff88007b6c1ee0] sys_umount at ffffffff8119b34b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;After this nothing cound progress:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[145371.090429] LustreError: 22398:0:(fail.c:133:__cfs_fail_timeout_set()) cfs_f
ail_timeout id 407 sleeping for 10000ms
[145380.552626] Lustre: lustre-MDT0000: Will be in recovery for at least 1:00, o
r until 1 client reconnects
[145380.573743] Lustre: lustre-MDT0000: Recovery over after 0:01, of 1 clients 1
 recovered and 0 were evicted.
[145380.588321] Lustre: lustre-OST0001: deleting orphan objects from 0x0:176 to 
192
[145380.588747] Lustre: Skipped 1 previous similar message
[145381.093065] LustreError: 22398:0:(fail.c:137:__cfs_fail_timeout_set()) cfs_f
ail_timeout id 407 awake
[145459.804339] Lustre: Failing over lustre-MDT0000
[145459.809747] LustreError: 11-0: lustre-MDT0000-mdc-ffff88008c61dbf0: Communic
ating with 0@lo, operation mds_reint failed with -19.
[145459.810324] LustreError: Skipped 5 previous similar messages
[145460.115626] LustreError: 20940:0:(client.c:1039:ptlrpc_import_delay_req()) @
@@ IMP_CLOSED   req@ffff88008fd2ebf0 x1425344870597376/t0(0) o6-&amp;gt;lustre-OST0000-
osc-MDT0000@0@lo:28/4 lens 664/432 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
[145460.157395] LustreError: 20938:0:(client.c:1039:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff880073692bf0 x1425344870597409/t0(0) o6-&amp;gt;lustre-OST0001-osc-MDT0000@0@lo:28/4 lens 664/432 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
[145460.157395] LustreError: 20938:0:(client.c:1039:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff880073692bf0 x1425344870597409/t0(0) o6-&amp;gt;lustre-OST0001-osc-MDT0000@0@lo:28/4 lens 664/432 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
[145460.158377] LustreError: 20938:0:(client.c:1039:ptlrpc_import_delay_req()) Skipped 7 previous similar messages
[145460.608530] LustreError: 137-5: lustre-MDT0000: Not available for connect from 0@lo (stopping)
[145465.605136] LustreError: 137-5: lustre-MDT0000: Not available for connect from 0@lo (stopping)
[145465.606017] LustreError: Skipped 3 previous similar messages
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I have a crashdump.&lt;/p&gt;</description>
                <environment></environment>
        <key id="17339">LU-2701</key>
            <summary>recovery-small test 27 umount hang </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                            <label>MB</label>
                    </labels>
                <created>Tue, 29 Jan 2013 01:16:18 +0000</created>
                <updated>Fri, 15 Mar 2013 08:53:12 +0000</updated>
                            <resolved>Fri, 15 Mar 2013 01:27:57 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="51375" author="green" created="Tue, 29 Jan 2013 01:21:48 +0000"  >&lt;p&gt;This problem seems to be introduced by commit 74ec68346e14851ad8a1912185e1dccd3e6d12cd falso from Wangdi:     &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1187&quot; title=&quot;DNE Phase 1: Remote Directories&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1187&quot;&gt;&lt;del&gt;LU-1187&lt;/del&gt;&lt;/a&gt; lod: Fix config log and setup process for DNE&lt;/p&gt;</comment>
                            <comment id="51377" author="jlevi" created="Tue, 29 Jan 2013 01:33:27 +0000"  >&lt;p&gt;Di is looking into this one as well.&lt;/p&gt;</comment>
                            <comment id="51378" author="green" created="Tue, 29 Jan 2013 01:38:34 +0000"  >&lt;p&gt;Hm, actually looking through earlier logs, I have seem a very similar stack before for umount, but not triggering in recovery-small test 27 so reliably.&lt;/p&gt;</comment>
                            <comment id="51390" author="green" created="Tue, 29 Jan 2013 09:40:08 +0000"  >&lt;p&gt;First appearance I see was on Jan 19, so still a recent thing.&lt;/p&gt;</comment>
                            <comment id="51427" author="di.wang" created="Tue, 29 Jan 2013 19:20:16 +0000"  >&lt;p&gt;Hmm, if the first appearance is on Jan 19th, most changes on this area has not been landed yet. I think most of the patches here were landed on Jan 22th.  And also it seems cleanup process is waiting for unlink log threads to be stopped, so it is unlikely fid on ost problem, IMHO.  I will look at it deeper, sigh, can not reproduce it locally.&lt;/p&gt;</comment>
                            <comment id="51974" author="adilger" created="Thu, 7 Feb 2013 10:41:57 +0000"  >&lt;p&gt;Problem appears to be in LOD/OSP code, not DNE.&lt;/p&gt;</comment>
                            <comment id="52670" author="bzzz" created="Tue, 19 Feb 2013 03:33:00 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/5463&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5463&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="54079" author="bzzz" created="Fri, 15 Mar 2013 01:27:49 +0000"  >&lt;p&gt;the patch is landed. the issue is hopefully solved.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvgbz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6294</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>