<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:19:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1777] open-by-fid: deadlock in lock_rename()</title>
                <link>https://jira.whamcloud.com/browse/LU-1777</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root]# /usr/src/lustre-release/lustre/tests/llmount.sh
[root]# cd /mnt/lustre/
[root]# mkdir sanity
[root]# chown sanity: sanity
[root]# su sanity
[sanity]$ pwd
/mnt/lustre
[sanity]$ sys_path2fid .
[0x61ab:0xef3d87c8:0x0]
[sanity]$ sys_rename sanity .lustre/fid/[0x61ab:0xef3d87c8:0x0]/sanity
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;rename() wedges in lock_rename().&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;INFO: task sys_rename:2960 blocked for more than 120 seconds.
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
sys_rename    D 0000000000000000     0  2960   2933 0x00000080
 ffff88005cc37cf8 0000000000000082 ffff88005cc37d08 ffffffff81189a05
 0000001000000000 ffff88007b01cb70 ffffffff8100bc0e ffff88005cc37cf8
 ffff880062a67098 ffff88005cc37fd8 000000000000fb88 ffff880062a67098
Call Trace:
 [&amp;lt;ffffffff81189a05&amp;gt;] ? __link_path_walk+0x155/0x1030
 [&amp;lt;ffffffff8100bc0e&amp;gt;] ? apic_timer_interrupt+0xe/0x20
 [&amp;lt;ffffffff8104f18b&amp;gt;] ? mutex_spin_on_owner+0x9b/0xc0
 [&amp;lt;ffffffff814ff2fe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
 [&amp;lt;ffffffff814ff19b&amp;gt;] mutex_lock+0x2b/0x50
 [&amp;lt;ffffffff811878e3&amp;gt;] lock_rename+0x73/0xe0
 [&amp;lt;ffffffff8118af83&amp;gt;] sys_renameat+0x113/0x260
 [&amp;lt;ffffffff8119a470&amp;gt;] ? mntput_no_expire+0x30/0x110
 [&amp;lt;ffffffff8117cb11&amp;gt;] ? __fput+0x1a1/0x210
 [&amp;lt;ffffffff81142c7e&amp;gt;] ? remove_vma+0x6e/0x90
 [&amp;lt;ffffffff810d6b12&amp;gt;] ? audit_syscall_entry+0x272/0x2a0
 [&amp;lt;ffffffff815036de&amp;gt;] ? do_page_fault+0x3e/0xa0
 [&amp;lt;ffffffff8118b0eb&amp;gt;] sys_rename+0x1b/0x20
 [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root]# pidof sys_rename
2960
[root]# cat /proc/2960/stack
[&amp;lt;ffffffff811878e3&amp;gt;] lock_rename+0x73/0xe0
[&amp;lt;ffffffff8118af83&amp;gt;] sys_renameat+0x113/0x260
[&amp;lt;ffffffff8118b0eb&amp;gt;] sys_rename+0x1b/0x20
[&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
[&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment># uanme -r&lt;br/&gt;
2.6.32-279.5.1.el6.x86_64&lt;br/&gt;
# cat /proc/fs/lustre/version &lt;br/&gt;
lustre: 2.2.93&lt;br/&gt;
kernel: patchless_client&lt;br/&gt;
build:  2.2.93-gbaaf628-PRISTINE-2.6.32-279.5.1.el6.x86_64</environment>
        <key id="15546">LU-1777</key>
            <summary>open-by-fid: deadlock in lock_rename()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="4" iconUrl="https://jira.whamcloud.com/images/icons/statuses/reopened.png" description="This issue was once resolved, but the resolution was deemed incorrect. From here issues are either marked assigned or resolved.">Reopened</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jhammond">John Hammond</reporter>
                        <labels>
                            <label>open-by-fid</label>
                    </labels>
                <created>Tue, 21 Aug 2012 16:30:18 +0000</created>
                <updated>Thu, 20 Jul 2017 03:29:53 +0000</updated>
                                            <version>Lustre 2.4.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="43803" author="pjones" created="Mon, 27 Aug 2012 12:19:50 +0000"  >&lt;p&gt;Niu&lt;/p&gt;

&lt;p&gt;Could you please look at this one? It is similar to the work you just did for LU1518&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="43847" author="niu" created="Mon, 27 Aug 2012 23:48:41 +0000"  >&lt;p&gt;this should be fixed along with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1518&quot; title=&quot;Missing/bad operations in mdd_{obf,dot_lustre}_obj_op causing LBUGs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1518&quot;&gt;&lt;del&gt;LU-1518&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="43848" author="pjones" created="Mon, 27 Aug 2012 23:58:55 +0000"  >&lt;p&gt;ok then let&apos;s close this ticket as a duplicate and just ensure that the LU1518 fix cover this case also&lt;/p&gt;</comment>
                            <comment id="44073" author="pjones" created="Sat, 1 Sep 2012 09:04:57 +0000"  >&lt;p&gt;As per John this was not fixed by the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1518&quot; title=&quot;Missing/bad operations in mdd_{obf,dot_lustre}_obj_op causing LBUGs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1518&quot;&gt;&lt;del&gt;LU-1518&lt;/del&gt;&lt;/a&gt; after all so reopening&lt;/p&gt;</comment>
                            <comment id="44091" author="niu" created="Mon, 3 Sep 2012 03:59:07 +0000"  >&lt;p&gt;I think there isn&apos;t a quick fix for such deadlock. We need some way on server side to detect the recursive rename, which should check the &apos;fid&apos; directory as well.&lt;/p&gt;

&lt;p&gt;Given that rename files in the &apos;fid&apos; directory isn&apos;t an legal usage, I suggest let&apos;s lower the priority of this ticket and fix it in later version.&lt;/p&gt;</comment>
                            <comment id="44103" author="adilger" created="Mon, 3 Sep 2012 13:04:19 +0000"  >&lt;p&gt;While it would be good to get this fixed for 2.3, since this only affects the client and not the MDS, I&apos;m removing this as a blocker for 2.3 and moving it to 2.4.  This isn&apos;t a problem that can be hit accidentally. &lt;/p&gt;</comment>
                            <comment id="46307" author="niu" created="Wed, 10 Oct 2012 03:17:12 +0000"  >&lt;p&gt;The client is dealock in lock_rename():&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
{
        struct dentry *p;

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (p1 == p2) {
                mutex_lock_nested(&amp;amp;p1-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_PARENT);
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; NULL;
        }

        mutex_lock(&amp;amp;p1-&amp;gt;d_inode-&amp;gt;i_sb-&amp;gt;s_vfs_rename_mutex);

        p = d_ancestor(p2, p1);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (p) {
                mutex_lock_nested(&amp;amp;p2-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_PARENT);
                mutex_lock_nested(&amp;amp;p1-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_CHILD);
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; p;
        }

        p = d_ancestor(p1, p2);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (p) {
                mutex_lock_nested(&amp;amp;p1-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_PARENT);
                mutex_lock_nested(&amp;amp;p2-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_CHILD);
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; p;
        }

        mutex_lock_nested(&amp;amp;p1-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_PARENT);
        mutex_lock_nested(&amp;amp;p2-&amp;gt;d_inode-&amp;gt;i_mutex, I_MUTEX_CHILD);
        &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; NULL;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The root cause is that with &apos;fid&apos; directory, we can have two directory dentries pointing to the same inode on client, so lock_rename() will try to lock the same inode from two different dentries twice. Without patching kernel, I&apos;m not sure if there is any good way to solve it.&lt;/p&gt;

&lt;p&gt;Anyway, I don&apos;t think it should be a blocker for 2.4. Andreas, any comments? Thanks.&lt;/p&gt;</comment>
                            <comment id="46778" author="adilger" created="Fri, 19 Oct 2012 12:23:30 +0000"  >&lt;p&gt;Is it possible to block renames that involve the .lustre directory?&lt;/p&gt;</comment>
                            <comment id="46799" author="niu" created="Fri, 19 Oct 2012 21:48:11 +0000"  >&lt;p&gt;No, I don&apos;t think so. It block renames that involve the &apos;fid&apos; directory.&lt;/p&gt;</comment>
                            <comment id="122986" author="parinay" created="Mon, 3 Aug 2015 09:50:09 +0000"  >&lt;p&gt;@Niu Yawei,&lt;br/&gt;
I am facing the deadlock in lock_rename while running sanity/test_154a, open_by_fid test.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jul 31 16:10:01 localhost kernel: mrename       D 0000000000000000     0 19394  19337 0x00000080
Jul 31 16:10:01 localhost kernel: ffff8800054f1cf8 0000000000000082 0000000000000000 ffffffff811850f5
Jul 31 16:10:01 localhost kernel: 0000000000000000 ffffea00001267b8 ffffffff8100bc0e ffff8800054f1cf8
Jul 31 16:10:01 localhost kernel: ffff88000dffc678 ffff8800054f1fd8 000000000000f4e8 ffff88000dffc678
Jul 31 16:10:01 localhost kernel: Call Trace:
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff811850f5&amp;gt;] ? __link_path_walk+0x155/0x1030
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff8100bc0e&amp;gt;] ? apic_timer_interrupt+0xe/0x20
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff8104d92d&amp;gt;] ? mutex_spin_on_owner+0x8d/0xc0
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff814eebbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff81183b01&amp;gt;] ? path_put+0x31/0x40
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff814eea5b&amp;gt;] mutex_lock+0x2b/0x50
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff81182f83&amp;gt;] lock_rename+0x73/0xe0
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff81186673&amp;gt;] sys_renameat+0x113/0x260
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff81195b70&amp;gt;] ? mntput_no_expire+0x30/0x110
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff81178271&amp;gt;] ? __fput+0x1a1/0x210
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff8113f43e&amp;gt;] ? remove_vma+0x6e/0x90
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff810d4932&amp;gt;] ? audit_syscall_entry+0x272/0x2a0
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff814f2fce&amp;gt;] ? do_page_fault+0x3e/0xa0
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff811867db&amp;gt;] sys_rename+0x1b/0x20
Jul 31 16:10:01 localhost kernel: [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This is reproducible  every time I run sanity/154a. This is lustre 2.1.5 ( esp with backports of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4279&quot; title=&quot;.lustre and .lustre/fid cannot by accessed by FID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4279&quot;&gt;&lt;del&gt;LU-4279&lt;/del&gt;&lt;/a&gt;, &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3245&quot; title=&quot;FS root OBF regression&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3245&quot;&gt;&lt;del&gt;LU-3245&lt;/del&gt;&lt;/a&gt; )&lt;br/&gt;
Can you provide some pointers so that I can work on the fix ?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="127746" author="vinayakh" created="Fri, 18 Sep 2015 04:30:55 +0000"  >&lt;p&gt;Hello Andreas, Niu Yawei, &lt;/p&gt;

&lt;p&gt;I have also faced this dead lock while renaming .lustre to .lustre using its fid. i.e&lt;/p&gt;

&lt;p&gt;echo &quot;rename .lustre to itself&quot;&lt;br/&gt;
    fid=$($LFS path2fid $DIR)&lt;br/&gt;
    mrename $DIR/.lustre $DIR/.lustre/fid/$fid/.lustre &amp;amp;&amp;amp;&lt;br/&gt;
       error &quot;rename .lustre to itself should fail.&quot;&lt;/p&gt;

&lt;p&gt;call trace.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
 mrename       D 0000000000000000     0 25974  25917 0x00000080
 ffff880014401cf8 0000000000000086 ffff880014401d08 ffffffff811850f5
 0000000000000000 ffffea00004fe840 ffffffff8100bc0e ffff880014401cf8
 ffff8800054afa78 ffff880014401fd8 000000000000f4e8 ffff8800054afa78
 Call Trace:
 [&amp;lt;ffffffff811850f5&amp;gt;] ? __link_path_walk+0x155/0x1030
 [&amp;lt;ffffffff8100bc0e&amp;gt;] ? apic_timer_interrupt+0xe/0x20
 [&amp;lt;ffffffff8104d92d&amp;gt;] ? mutex_spin_on_owner+0x8d/0xc0
 [&amp;lt;ffffffff814eebbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
 [&amp;lt;ffffffff81183b01&amp;gt;] ? path_put+0x31/0x40
 [&amp;lt;ffffffff814eea5b&amp;gt;] mutex_lock+0x2b/0x50
 [&amp;lt;ffffffff81182f83&amp;gt;] lock_rename+0x73/0xe0
 [&amp;lt;ffffffff81186673&amp;gt;] sys_renameat+0x113/0x260
 [&amp;lt;ffffffff81195b70&amp;gt;] ? mntput_no_expire+0x30/0x110
 [&amp;lt;ffffffff81178271&amp;gt;] ? __fput+0x1a1/0x210
 [&amp;lt;ffffffff8113f43e&amp;gt;] ? remove_vma+0x6e/0x90
 [&amp;lt;ffffffff810d4932&amp;gt;] ? audit_syscall_entry+0x272/0x2a0
 [&amp;lt;ffffffff814f2fce&amp;gt;] ? do_page_fault+0x3e/0xa0
 [&amp;lt;ffffffff811867db&amp;gt;] sys_rename+0x1b/0x20
 [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I have tried to catch this issue in llite layer and return -EPERM from there but not successful. Is this case not currently not supported by lustre or Am i doing something wrong here ?&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="14908">LU-1518</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw2cv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10464</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>