<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:18:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8509] drop_caches hangs in cl_inode_fini()</title>
                <link>https://jira.whamcloud.com/browse/LU-8509</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running lustre 2.8.0_0.0.llnlpreview.18 on the clients (see the lustre-release-fe-llnl) , we are regularly seeing hangs of the /etc/slurm/prolog script when it triggers drop_caches.  This script runs before each job to clear out the cache from any previous jobs.&lt;/p&gt;

&lt;p&gt;In particular it hangs here:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;#  Flush slab cache entries
echo 2 &amp;gt;/proc/sys/vm/drop_caches
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And this is the backtrace for where it is getting stuck:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt -xs 1386
PID: 1386   TASK: ffff88201b0a5080  CPU: 10  COMMAND: &quot;prolog&quot;
 #0 [ffff882011bd3af8] __schedule+0x295 at ffffffff81651975
 #1 [ffff882011bd3b60] schedule+0x29 at ffffffff81652049
 #2 [ffff882011bd3b70] cl_inode_fini+0x1ac at ffffffffa0c6b3ac [lustre]
 #3 [ffff882011bd3c10] ll_clear_inode+0x21c at ffffffffa0c377ec [lustre]
 #4 [ffff882011bd3c38] ll_delete_inode+0x58 at ffffffffa0c39048 [lustre]
 #5 [ffff882011bd3c60] evict+0xa7 at ffffffff81204077
 #6 [ffff882011bd3c88] dispose_list+0x3e at ffffffff8120417e
 #7 [ffff882011bd3cb0] prune_icache_sb+0x163 at ffffffff81205113
 #8 [ffff882011bd3d18] prune_super+0x143 at ffffffff811ea343
 #9 [ffff882011bd3d50] shrink_slab+0x175 at ffffffff81183a25
#10 [ffff882011bd3e08] drop_caches_sysctl_handler+0x283 at ffffffff8124a743
#11 [ffff882011bd3e90] proc_sys_call_handler+0xd3 at ffffffff81260f03
#12 [ffff882011bd3ee8] proc_sys_write+0x14 at ffffffff81260f34
#13 [ffff882011bd3ef8] vfs_write+0xbd at ffffffff811e7bfd
#14 [ffff882011bd3f38] sys_write+0x7f at ffffffff811e869f
#15 [ffff882011bd3f80] system_call_fastpath+0x16 at ffffffff8165d709
    RIP: 00007ffff76d3500  RSP: 00007fffffffe180  RFLAGS: 00010206
    RAX: 0000000000000001  RBX: ffffffff8165d709  RCX: 0000000000000400
    RDX: 0000000000000002  RSI: 00007ffff7ff8000  RDI: 0000000000000001
    RBP: 00007ffff7ff8000   R8: 000000000000000a   R9: 00007ffff7fbd740
    R10: 00007fffffffe670  R11: 0000000000000246  R12: 0000000000000001
    R13: 0000000000000002  R14: 00007ffff79a7400  R15: 0000000000000002
    ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="38899">LU-8509</key>
            <summary>drop_caches hangs in cl_inode_fini()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="morrone">Christopher Morrone</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Wed, 17 Aug 2016 00:08:58 +0000</created>
                <updated>Mon, 6 Jan 2020 22:19:14 +0000</updated>
                            <resolved>Wed, 5 Oct 2016 11:57:07 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>13</watches>
                                                                            <comments>
                            <comment id="162180" author="pjones" created="Wed, 17 Aug 2016 15:03:44 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please asssit on this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="162335" author="bobijam" created="Thu, 18 Aug 2016 13:40:40 +0000"  >&lt;p&gt;Do the image built with --enable-lu_ref defined in configure? cl_inode_fini() is waiting for the lli_clob reference becoming to 1, and it seems that another thread referenced the object does not release the reference thereafter.&lt;/p&gt;</comment>
                            <comment id="162410" author="morrone" created="Thu, 18 Aug 2016 18:04:56 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Do the image built with --enable-lu_ref defined in configure?&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;No, we are not setting that.&lt;/p&gt;</comment>
                            <comment id="162949" author="bobijam" created="Tue, 23 Aug 2016 23:13:22 +0000"  >&lt;p&gt;Can you dump all threads trace of this hit? (echo t &amp;gt; /proc/sysrq_trigger)&lt;/p&gt;</comment>
                            <comment id="162953" author="morrone" created="Tue, 23 Aug 2016 23:51:37 +0000"  >&lt;p&gt;Perhaps the next time it happens.&lt;/p&gt;</comment>
                            <comment id="163008" author="amk" created="Wed, 24 Aug 2016 15:39:27 +0000"  >&lt;p&gt;Cray has hit this bug several times. apinit, one of the Cray workload manager daemons, hangs while dropping vm caches: echo 3 &amp;gt; /proc/sys/vm/drop_caches. apinit drops vm caches at the end of each job after dropping ldlm caches. When apinit fails to complete the vm drop_caches, the Node Health Checker (NHC) first marks the node suspect and then marks it admindown. In this state no new jobs are scheduled.&lt;/p&gt;

&lt;p&gt;apinit appears to be hung waiting for the loh_ref count of a cl_object to drop from 2 to 1.&lt;/p&gt;

&lt;p&gt;I uploaded a dump to ftp.intel.com:/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8509&quot; title=&quot;drop_caches hangs in cl_inode_fini()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8509&quot;&gt;&lt;del&gt;LU-8509&lt;/del&gt;&lt;/a&gt; in case it may be of some help.&lt;/p&gt;</comment>
                            <comment id="167380" author="gerrit" created="Mon, 26 Sep 2016 20:46:16 +0000"  >&lt;p&gt;Andrew Perepechko (andrew.perepechko@seagate.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/22743&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22743&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8509&quot; title=&quot;drop_caches hangs in cl_inode_fini()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8509&quot;&gt;&lt;del&gt;LU-8509&lt;/del&gt;&lt;/a&gt; tests: drop_caches hangs in cl_inode_fini()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9b1a256b2cede7866fa7c86916ddebab88800ad0&lt;/p&gt;</comment>
                            <comment id="167387" author="gerrit" created="Mon, 26 Sep 2016 21:16:41 +0000"  >&lt;p&gt;Andrew Perepechko (andrew.perepechko@seagate.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/22745&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22745&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8509&quot; title=&quot;drop_caches hangs in cl_inode_fini()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8509&quot;&gt;&lt;del&gt;LU-8509&lt;/del&gt;&lt;/a&gt; llite: drop_caches hangs in cl_inode_fini()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 124b249c8ccaa4aba925916752d0a3fa51fda2f1&lt;/p&gt;</comment>
                            <comment id="168300" author="gerrit" created="Wed, 5 Oct 2016 03:51:30 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/22745/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22745/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8509&quot; title=&quot;drop_caches hangs in cl_inode_fini()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8509&quot;&gt;&lt;del&gt;LU-8509&lt;/del&gt;&lt;/a&gt; llite: drop_caches hangs in cl_inode_fini()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c594026329e6a78a6c9f3188514211647b3040d8&lt;/p&gt;</comment>
                            <comment id="168321" author="pjones" created="Wed, 5 Oct 2016 11:57:07 +0000"  >&lt;p&gt;Landed for 2.9&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="42419">LU-8936</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="40903">LU-8743</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="40903">LU-8743</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzykyn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>