<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:48:00 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5039] MDS mount hangs on orphan recovery</title>
                <link>https://jira.whamcloud.com/browse/LU-5039</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running Lustre 2.4.0-28chaos (see github.com/chaos/lustre), we find that sometimes after a reboot the MDS can get stuck during mount cleaning up the orphan files in the PENDING directory.  Some times we have 100,000+ files to process, and this can take literally hours.  The symptoms are pretty similar to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5038&quot; title=&quot;Mount hangs for hours processing some llog&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5038&quot;&gt;&lt;del&gt;LU-5038&lt;/del&gt;&lt;/a&gt;, but I believe that the cause is different.&lt;/p&gt;

&lt;p&gt;Here is a backtrace of the offending thread:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2014-03-06 22:34:12 Process tgt_recov (pid: 15478, threadinfo ffff8807bc436000, task ffff88081a6e2080)
2014-03-06 22:34:12 Stack:
2014-03-06 22:34:12  ffff88072e3df000 0000000000000000 0000000000003f14 ffff88072e3df060
2014-03-06 22:34:12 &amp;lt;d&amp;gt; ffff8807bc437a40 ffffffffa0341396 ffff8807bc437a20 ffff88072e3df038
2014-03-06 22:34:12 &amp;lt;d&amp;gt; 0000000000000014 ffff8807f9fbf530 0000000000000000 0000000000003f14
2014-03-06 22:34:12 Call Trace:
2014-03-06 22:34:12  [&amp;lt;ffffffffa0341396&amp;gt;] __dbuf_hold_impl+0x66/0x480 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa034182f&amp;gt;] dbuf_hold_impl+0x7f/0xb0 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa03428e0&amp;gt;] dbuf_hold+0x20/0x30 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa03486e7&amp;gt;] dmu_buf_hold+0x97/0x1d0 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa03369a0&amp;gt;] ? remove_reference+0xa0/0xc0 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa039e76b&amp;gt;] zap_idx_to_blk+0xab/0x140 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa039ff61&amp;gt;] zap_deref_leaf+0x51/0x80 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa039f956&amp;gt;] ? zap_put_leaf+0x86/0xe0 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa03a03dc&amp;gt;] fzap_cursor_retrieve+0xfc/0x2a0 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa03a593b&amp;gt;] zap_cursor_retrieve+0x17b/0x2f0 [zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0d1739c&amp;gt;] ? udmu_zap_cursor_init_serialized+0x2c/0x30 [osd_zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0d29058&amp;gt;] osd_index_retrieve_skip_dots+0x28/0x60 [osd_zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0d29638&amp;gt;] osd_dir_it_next+0x98/0x120 [osd_zfs]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0f08161&amp;gt;] lod_it_next+0x21/0x90 [lod]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0dd1989&amp;gt;] __mdd_orphan_cleanup+0xa9/0xca0 [mdd]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0de134d&amp;gt;] mdd_recovery_complete+0xed/0x170 [mdd]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0e34cb5&amp;gt;] mdt_postrecov+0x35/0xd0 [mdt]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0e36178&amp;gt;] mdt_obd_postrecov+0x78/0x90 [mdt]
2014-03-06 22:34:12  [&amp;lt;ffffffffa08745c0&amp;gt;] ? ldlm_reprocess_res+0x0/0x20 [ptlrpc]
2014-03-06 22:34:12  [&amp;lt;ffffffffa086f8ae&amp;gt;] ? ldlm_reprocess_all_ns+0x3e/0x110 [ptlrpc]
2014-03-06 22:34:12  [&amp;lt;ffffffffa0885004&amp;gt;] target_recovery_thread+0xc64/0x1980 [ptlrpc]
2014-03-06 22:34:12  [&amp;lt;ffffffffa08843a0&amp;gt;] ? target_recovery_thread+0x0/0x1980 [ptlrpc]
2014-03-06 22:34:12  [&amp;lt;ffffffff8100c10a&amp;gt;] child_rip+0xa/0x20
2014-03-06 22:34:12  [&amp;lt;ffffffffa08843a0&amp;gt;] ? target_recovery_thread+0x0/0x1980 [ptlrpc]
2014-03-06 22:34:12  [&amp;lt;ffffffffa08843a0&amp;gt;] ? target_recovery_thread+0x0/0x1980 [ptlrpc]
2014-03-06 22:34:12  [&amp;lt;ffffffff8100c100&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The mount process is blocked while this is going on.  The cleanup is completely sequential and on ZFS very slow, on the order of 10 per second.&lt;/p&gt;

&lt;p&gt;The orphan cleanup task really needs to be backgrounded (and perhaps parallelized) rather than blocking the MDT mount processes.&lt;/p&gt;</description>
                <environment></environment>
        <key id="24639">LU-5039</key>
            <summary>MDS mount hangs on orphan recovery</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="morrone">Christopher Morrone</reporter>
                        <labels>
                            <label>mn4</label>
                    </labels>
                <created>Fri, 9 May 2014 21:11:08 +0000</created>
                <updated>Tue, 12 Aug 2014 19:43:52 +0000</updated>
                            <resolved>Mon, 23 Jun 2014 21:42:35 +0000</resolved>
                                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.3</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="83707" author="pjones" created="Sat, 10 May 2014 15:44:41 +0000"  >&lt;p&gt;Alex&lt;/p&gt;

&lt;p&gt;I think that it is best that you comment on this one&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="83805" author="bzzz" created="Mon, 12 May 2014 06:04:31 +0000"  >&lt;p&gt;the idea is correct and fine. though I&apos;m very confused by 10/second - we should be able to do much faster, given no LDLM contention, etc.&lt;/p&gt;</comment>
                            <comment id="84824" author="pjones" created="Fri, 23 May 2014 23:09:47 +0000"  >&lt;p&gt;Niu &lt;/p&gt;

&lt;p&gt;Could you please create a patch based on Oleg&apos;s suggestion (to follow)&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="84825" author="green" created="Fri, 23 May 2014 23:12:06 +0000"  >&lt;p&gt;While these slow deletions are extreme and at least we can speed up the startup by doing deetions from a separate thread once reovery is complete.&lt;br/&gt;
Basiclly we&apos;ll create a new PENDING dir and will move all entries fclaimed by recovery rom old pending there. Then we just spawn another thread to delete the old pending and its content.&lt;/p&gt;

&lt;p&gt;Need to be careful about of mds failure while doing this split handling and another recovery-restart - we probably would need to move all entries to old pending and redo the process. there sohul not be many due to recovery I hope&lt;/p&gt;</comment>
                            <comment id="84852" author="niu" created="Mon, 26 May 2014 02:57:56 +0000"  >&lt;blockquote&gt;
&lt;p&gt;Basiclly we&apos;ll create a new PENDING dir and will move all entries fclaimed by recovery rom old pending there. Then we just spawn another thread to delete the old pending and its content.&lt;br/&gt;
Need to be careful about of mds failure while doing this split handling and another recovery-restart - we probably would need to move all entries to old pending and redo the process. there sohul not be many due to recovery I hope&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;What bad will happen if we just start a thread to delete orphans from the original PENDING?&lt;/p&gt;</comment>
                            <comment id="85522" author="green" created="Mon, 2 Jun 2014 21:55:41 +0000"  >&lt;p&gt;the possible problem is that list of items in PENDING is not fixed, new files might be added.&lt;br/&gt;
Can we reliably and race-free tell the ones that are still needed from those that are stale and need to be killed? &lt;br/&gt;
Also things like NFS further omplicate thingsby possible brifly reattaching to deleted files that were ought to be deleted.&lt;/p&gt;</comment>
                            <comment id="85551" author="bzzz" created="Tue, 3 Jun 2014 09:43:29 +0000"  >&lt;p&gt;there is open count in mdd object which tells whether the file is in use. probably we&apos;ll have to add locking to protect the last close vs. the cleanup procedure..&lt;/p&gt;</comment>
                            <comment id="85553" author="niu" created="Tue, 3 Jun 2014 09:57:11 +0000"  >&lt;blockquote&gt;
&lt;p&gt;there is open count in mdd object which tells whether the file is in use. probably we&apos;ll have to add locking to protect the last close vs. the cleanup procedure..&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Indeed, I&apos;ve talked with Oleg about this, and looks we already have lock serialized last close and orphan cleanup. I&apos;ll compose patch soon. Thank you all.&lt;/p&gt;</comment>
                            <comment id="85686" author="niu" created="Wed, 4 Jun 2014 11:39:14 +0000"  >&lt;p&gt;cleanup orphan asynchronously: &lt;a href=&quot;http://review.whamcloud.com/10584&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10584&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87143" author="niu" created="Fri, 20 Jun 2014 07:15:30 +0000"  >&lt;p&gt;patch landed on master, need we backport it b2_4 &amp;amp; b2_5?&lt;/p&gt;</comment>
                            <comment id="87151" author="pjones" created="Fri, 20 Jun 2014 13:11:11 +0000"  >&lt;p&gt;Yes I thin that we should&lt;/p&gt;</comment>
                            <comment id="87256" author="niu" created="Mon, 23 Jun 2014 02:43:29 +0000"  >&lt;p&gt;b2_4: &lt;a href=&quot;http://review.whamcloud.com/10779&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10779&lt;/a&gt;&lt;br/&gt;
b2_5: &lt;a href=&quot;http://review.whamcloud.com/10780&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10780&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87322" author="pjones" created="Mon, 23 Jun 2014 21:42:35 +0000"  >&lt;p&gt;Landed for 2.6&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwm87:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13931</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>