<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:38:04 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3919] deadlock on MDT, possibly related to quota?</title>
                <link>https://jira.whamcloud.com/browse/LU-3919</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We had a deadlock recently on an MDS at NOAA. The threads were mostly waiting for a mutex in mds_lookup and for the quota master lock in target_handle_dqacq_callback, with some other ldiskfs lock waiters in there too. &lt;/p&gt;

&lt;p&gt;I will attach the bt, bt -f, log, and kern.log files. Let me know if there is anything else I should get from the vmcore.&lt;/p&gt;</description>
                <environment></environment>
        <key id="20884">LU-3919</key>
            <summary>deadlock on MDT, possibly related to quota?</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="2">Won&apos;t Fix</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="orentas">Oz Rentas</reporter>
                        <labels>
                    </labels>
                <created>Tue, 10 Sep 2013 23:07:36 +0000</created>
                <updated>Thu, 23 Jun 2016 08:12:21 +0000</updated>
                            <resolved>Thu, 23 Jun 2016 08:12:21 +0000</resolved>
                                    <version>Lustre 1.8.9</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="66278" author="pjones" created="Tue, 10 Sep 2013 23:10:15 +0000"  >&lt;p&gt;Niu&lt;/p&gt;

&lt;p&gt;Could you please look into this?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="66296" author="niu" created="Wed, 11 Sep 2013 02:37:14 +0000"  >&lt;p&gt;Kit, that should be enough. Thanks.&lt;/p&gt;</comment>
                            <comment id="66452" author="niu" created="Thu, 12 Sep 2013 03:42:52 +0000"  >&lt;p&gt;Here is the deadlock:&lt;/p&gt;

&lt;p&gt;A thread is holding the inode lock of _&lt;em&gt;iopen&lt;/em&gt;_, then try to start journal:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;PID: 23587  TASK: ffff81082c0be860  CPU: 6   COMMAND: &lt;span class=&quot;code-quote&quot;&gt;&quot;ll_mdt_29&quot;&lt;/span&gt;
 #0 [ffff8108231ad2b0] schedule at ffffffff80062fa0
 #1 [ffff8108231ad388] start_this_handle at ffffffff8b269d2d
 #2 [ffff8108231ad408] jbd2_journal_start at ffffffff8b269ea4
 #3 [ffff8108231ad428] ldiskfs_dquot_drop at ffffffff8b2aedbe
 #4 [ffff8108231ad448] clear_inode at ffffffff800234e7
 #5 [ffff8108231ad458] dispose_list at ffffffff800352d6
 #6 [ffff8108231ad488] shrink_icache_memory at ffffffff8002dcfb
 #7 [ffff8108231ad4c8] shrink_slab at ffffffff8003f7cd
 #8 [ffff8108231ad508] zone_reclaim at ffffffff800cf9ae
 #9 [ffff8108231ad5b8] get_page_from_freelist at ffffffff8000a8a7
#10 [ffff8108231ad628] __alloc_pages at ffffffff8000f48a
#11 [ffff8108231ad698] cache_grow at ffffffff80017a52
#12 [ffff8108231ad6e8] cache_alloc_refill at ffffffff8005c3ee
#13 [ffff8108231ad728] kmem_cache_alloc at ffffffff8000ac96
#14 [ffff8108231ad748] d_alloc at ffffffff80022de3
#15 [ffff8108231ad778] __lookup_hash at ffffffff80037210
#16 [ffff8108231ad7b8] lookup_one_len at ffffffff800ed617
#17 [ffff8108231ad7d8] mds_lookup at ffffffff8b367ba4
#18 [ffff8108231ad858] mds_fid2dentry at ffffffff8b359693
#19 [ffff8108231ad8f8] mds_fid2locked_dentry at ffffffff8b35b482
#20 [ffff8108231ad9b8] mds_getattr_lock at ffffffff8b35bd67
#21 [ffff8108231adb28] mds_intent_policy at ffffffff8b362453
#22 [ffff8108231adbd8] ldlm_lock_enqueue at ffffffff8aff6eb6
#23 [ffff8108231adc68] ldlm_handle_enqueue at ffffffff8b018b29
#24 [ffff8108231add08] mds_handle at ffffffff8b361210
#25 [ffff8108231ade18] ptlrpc_server_handle_request at ffffffff8b046874
#26 [ffff8108231adeb8] ptlrpc_main at ffffffff8b047f16
#27 [ffff8108231adf48] kernel_thread at ffffffff8005dfc1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;Whereas, another thread started journal, then try to take inode lock of _&lt;em&gt;iopen&lt;/em&gt;_:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;PID: 5862   TASK: ffff81123f6e20c0  CPU: 22  COMMAND: &lt;span class=&quot;code-quote&quot;&gt;&quot;ll_mdt_rdpg_14&quot;&lt;/span&gt;
 #0 [ffff8112247dd2c0] schedule at ffffffff80062fa0
 #1 [ffff8112247dd398] __mutex_lock_slowpath at ffffffff80063c63
 #2 [ffff8112247dd3d8] .text.lock.mutex at ffffffff80063cad (via mutex_lock)
 #3 [ffff8112247dd3f8] mds_lookup at ffffffff8b367b97
 #4 [ffff8112247dd478] mds_fid2dentry at ffffffff8b359693
 #5 [ffff8112247dd518] mds_lvfs_fid2dentry at ffffffff8b359a5d
 #6 [ffff8112247dd538] llog_lvfs_create at ffffffff8af6782e
 #7 [ffff8112247dd5c8] llog_cat_current_log at ffffffff8af5fffa
 #8 [ffff8112247dd6a8] llog_cat_add_rec at ffffffff8af6217d
 #9 [ffff8112247dd718] llog_obd_origin_add at ffffffff8af68438
#10 [ffff8112247dd768] llog_add at ffffffff8af68cf6
#11 [ffff8112247dd7c8] lov_llog_origin_add at ffffffff8b182494
#12 [ffff8112247dd858] llog_add at ffffffff8af68cf6
#13 [ffff8112247dd8b8] mds_llog_origin_add at ffffffff8b341bc5
#14 [ffff8112247dd928] llog_add at ffffffff8af68cf6
#15 [ffff8112247dd988] mds_llog_add_unlink at ffffffff8b34173d
#16 [ffff8112247dda08] mds_log_op_unlink at ffffffff8b344c8d
#17 [ffff8112247dda98] mds_mfd_close at ffffffff8b384a21
#18 [ffff8112247ddbf8] mds_close at ffffffff8b38c3d0
#19 [ffff8112247ddd08] mds_handle at ffffffff8b35f67b
#20 [ffff8112247dde18] ptlrpc_server_handle_request at ffffffff8b046874
#21 [ffff8112247ddeb8] ptlrpc_main at ffffffff8b047f16
#22 [ffff8112247ddf48] kernel_thread at ffffffff8005dfc1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="66702" author="niu" created="Mon, 16 Sep 2013 03:23:44 +0000"  >&lt;p&gt;Such kind of deadlock only happens when the memory is very tight, I can&apos;t think of a good solution for such deadlock so far. Maybe we should just return -ENOMEM and fail the getattr operation but not trigger shrinker in such case?&lt;/p&gt;</comment>
                            <comment id="66764" author="kitwestneat" created="Mon, 16 Sep 2013 18:27:18 +0000"  >&lt;p&gt;Hi Niu,&lt;/p&gt;

&lt;p&gt;Would it be possible to return ENOMEM where the lock is held, run the shrinker outside the lock, and then retry the lookup?&lt;/p&gt;

&lt;p&gt;Thanks.&lt;/p&gt;</comment>
                            <comment id="106778" author="gerrit" created="Thu, 12 Feb 2015 07:40:14 +0000"  >&lt;p&gt;Niu Yawei (yawei.niu@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13743&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13743&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3919&quot; title=&quot;deadlock on MDT, possibly related to quota?&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3919&quot;&gt;&lt;del&gt;LU-3919&lt;/del&gt;&lt;/a&gt; lvfs: fid2dentry violates locking order&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b1_8&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 1b5c23c2ab0b2c416782e94772ff1f42817a1df3&lt;/p&gt;</comment>
                            <comment id="156639" author="niu" created="Thu, 23 Jun 2016 08:12:21 +0000"  >&lt;p&gt;Close old 1.8 issue.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="13458" name="lu-3919.tar.gz" size="721466" author="kitwestneat" created="Wed, 11 Sep 2013 13:42:57 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Thu, 23 Apr 2015 23:07:36 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw1p3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10356</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 10 Sep 2013 23:07:36 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>