<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:46:18 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4840]  Deadlock when truncating file during lfs migrate</title>
                <link>https://jira.whamcloud.com/browse/LU-4840</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While migrating a file with &quot;lfs migrate&quot;, if a process tries to truncate the file, both lfs migrate and truncating processes will deadlock.&lt;/p&gt;

&lt;p&gt;This will result in both processes never finishing (unless it is killed) and watchdog messages saying that the processes did not progress for the last XXX seconds.&lt;/p&gt;

&lt;p&gt;Here is a reproducer:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@lustre24cli ~]# cat reproducer.sh
#!/bin/sh

FS=/test
FILE=${FS}/file

rm -f ${FILE}
# Create a file on OST 1 of size 512M
lfs setstripe -o 1 -c 1 ${FILE}
dd if=/dev/zero of=${FILE} bs=1M count=512

echo 3 &amp;gt; /proc/sys/vm/drop_caches

# Launch a migrate to OST 0 and a bit later open it for write
lfs migrate -i 0 --block ${FILE} &amp;amp;
sleep 2
dd if=/dev/zero of=${FILE} bs=1M count=512 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Once the last dd tries to open the file, both lfs and dd processes stay forever with this stack:&lt;/p&gt;

&lt;p&gt;lfs stack:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[&amp;lt;ffffffff8128e864&amp;gt;] call_rwsem_down_read_failed+0x14/0x30
[&amp;lt;ffffffffa08d98dd&amp;gt;] ll_file_io_generic+0x29d/0x600 [lustre]
[&amp;lt;ffffffffa08d9d7f&amp;gt;] ll_file_aio_read+0x13f/0x2c0 [lustre]
[&amp;lt;ffffffffa08da61c&amp;gt;] ll_file_read+0x16c/0x2a0 [lustre]
[&amp;lt;ffffffff811896b5&amp;gt;] vfs_read+0xb5/0x1a0
[&amp;lt;ffffffff811897f1&amp;gt;] sys_read+0x51/0x90
[&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
[&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;dd stack:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[&amp;lt;ffffffffa03436fe&amp;gt;] cfs_waitq_wait+0xe/0x10 [libcfs]
[&amp;lt;ffffffffa04779fa&amp;gt;] cl_lock_state_wait+0x1aa/0x320 [obdclass]
[&amp;lt;ffffffffa04781eb&amp;gt;] cl_enqueue_locked+0x15b/0x1f0 [obdclass]
[&amp;lt;ffffffffa0478d6e&amp;gt;] cl_lock_request+0x7e/0x270 [obdclass]
[&amp;lt;ffffffffa047e00c&amp;gt;] cl_io_lock+0x3cc/0x560 [obdclass]
[&amp;lt;ffffffffa047e242&amp;gt;] cl_io_loop+0xa2/0x1b0 [obdclass]
[&amp;lt;ffffffffa092a8c8&amp;gt;] cl_setattr_ost+0x208/0x2c0 [lustre]
[&amp;lt;ffffffffa08f8a0e&amp;gt;] ll_setattr_raw+0x9ce/0x1000 [lustre]
[&amp;lt;ffffffffa08f909b&amp;gt;] ll_setattr+0x5b/0xf0 [lustre]
[&amp;lt;ffffffff811a7348&amp;gt;] notify_change+0x168/0x340
[&amp;lt;ffffffff81187074&amp;gt;] do_truncate+0x64/0xa0
[&amp;lt;ffffffff8119bcc1&amp;gt;] do_filp_open+0x861/0xd20
[&amp;lt;ffffffff81185d39&amp;gt;] do_sys_open+0x69/0x140
[&amp;lt;ffffffff81185e50&amp;gt;] sys_open+0x20/0x30
[&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
[&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="23983">LU-4840</key>
            <summary> Deadlock when truncating file during lfs migrate</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="patrick.valentin">Patrick Valentin</reporter>
                        <labels>
                            <label>cea</label>
                    </labels>
                <created>Mon, 31 Mar 2014 13:41:52 +0000</created>
                <updated>Sun, 9 Oct 2016 13:52:48 +0000</updated>
                            <resolved>Mon, 14 Sep 2015 17:32:22 +0000</resolved>
                                    <version>Lustre 2.4.2</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>20</watches>
                                                                            <comments>
                            <comment id="80628" author="pjones" created="Mon, 31 Mar 2014 16:50:25 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please comment?&lt;/p&gt;

&lt;p&gt;thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="80721" author="green" created="Tue, 1 Apr 2014 17:30:07 +0000"  >&lt;p&gt;So I tried the script on master and it also happens there.&lt;/p&gt;</comment>
                            <comment id="80727" author="bogl" created="Tue, 1 Apr 2014 17:52:47 +0000"  >&lt;p&gt;I can also reproduce it on master.  The exact details of the lfs and dd task stacks are a little different but the deadlock is still there.&lt;/p&gt;</comment>
                            <comment id="80915" author="bobijam" created="Thu, 3 Apr 2014 03:59:03 +0000"  >&lt;p&gt;It relates to the lfs migrate implementation (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2445&quot; title=&quot;add &amp;quot;lfs migrate&amp;quot; support&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2445&quot;&gt;&lt;del&gt;LU-2445&lt;/del&gt;&lt;/a&gt;), lfs migrate first takes a group lock to limite concurrent OST access from other clients, then vfs reads and writes the file to migrate data from OSTs to OSTs, at last drops the group lock.&lt;/p&gt;

&lt;p&gt;In this case, lfs migrate gets the group lock, then the other client from the same node tries to truncate the file which takes the inode truncate semaphore (lli_trunc_sem) and enqueues OST lock and waits for it to be granted.&lt;/p&gt;

&lt;p&gt;The lfs migrate comes to the read phase, it tries get the truncate semaphore as well.&lt;/p&gt;

&lt;p&gt;The other client cannot get its OST lock granted, since OST cannot revoke it from the lfs migrate process. Deadlock happens.&lt;/p&gt;
</comment>
                            <comment id="80917" author="jay" created="Thu, 3 Apr 2014 05:08:25 +0000"  >&lt;p&gt;It&apos;s a good chance to reimplement migration with open lease.&lt;/p&gt;

&lt;p&gt;Aurelien and JC, do you have any inputs on this?&lt;/p&gt;</comment>
                            <comment id="81320" author="adegremont" created="Wed, 9 Apr 2014 20:17:38 +0000"  >&lt;p&gt;I see no objection to replacing grouplock-based code with an open-lease mechanism. Current code was using this lock because open lease did not exist as that time and we were advised to do this this way as they was no mechanism to protect from concurrent access. I think it will be cleaner.&lt;/p&gt;</comment>
                            <comment id="81360" author="jay" created="Thu, 10 Apr 2014 13:36:15 +0000"  >&lt;p&gt;That&apos;s true, Aurelien. As long as the migration was implemented by CEA, would it be possible for CEA to pick it up again to reimplement it with open lease?&lt;/p&gt;

&lt;p&gt;Jinshan&lt;/p&gt;</comment>
                            <comment id="81830" author="adegremont" created="Thu, 17 Apr 2014 14:40:37 +0000"  >&lt;p&gt;OK, CEA will do it. &lt;br/&gt;
Could you confirm some behaviour of open lease to be sure we are in line. There is only exclusive open lease for now? When lease are revoked by concurrent access?&lt;/p&gt;</comment>
                            <comment id="81945" author="hdoreau" created="Fri, 18 Apr 2014 14:38:28 +0000"  >&lt;p&gt;Here&apos;s a patch aimed to solve it:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/10013&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10013&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="82114" author="jay" created="Tue, 22 Apr 2014 00:20:27 +0000"  >&lt;p&gt;Use file lease to implement migration.&lt;/p&gt;</comment>
                            <comment id="82115" author="jay" created="Tue, 22 Apr 2014 00:23:32 +0000"  >&lt;p&gt;Please check the attachment for the implementation of migration.&lt;/p&gt;

&lt;p&gt;The procedure is a little bit like HSM release where close and swap layout should be an atomic operation. Also, you need to check if the lease is valid in the middle of data copying periodically therefore data copying can abort if the file is being opened by others.&lt;/p&gt;

&lt;p&gt;Please take a look at it and I&apos;ll be happy to answer questions.&lt;/p&gt;</comment>
                            <comment id="82127" author="hdoreau" created="Tue, 22 Apr 2014 08:44:41 +0000"  >&lt;p&gt;Thanks Jinshan.&lt;/p&gt;

&lt;p&gt;I still have a question regarding when to check data version. In which case could it fail if we both get and check it under the file lease? Also swap layout will perform a dataversion check. Is the following sequence flawed?&lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;open source file&lt;/li&gt;
	&lt;li&gt;get lease&lt;/li&gt;
	&lt;li&gt;open volatile&lt;/li&gt;
	&lt;li&gt;get data version&lt;/li&gt;
	&lt;li&gt;copy file content&lt;/li&gt;
	&lt;li&gt;put lease&lt;/li&gt;
	&lt;li&gt;swap layout (pass in data version for check)&lt;/li&gt;
	&lt;li&gt;close volatile&lt;/li&gt;
	&lt;li&gt;close source file&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="82239" author="jay" created="Wed, 23 Apr 2014 05:21:52 +0000"  >&lt;p&gt;We have to make `put lease&apos;, `swap layout&apos;, and `close source file&apos; in one atomic operation. Otherwise, if the source is opened for writing after `put lease&apos; and generate some dirty pages, it will produce data corruption.&lt;/p&gt;</comment>
                            <comment id="82248" author="hdoreau" created="Wed, 23 Apr 2014 08:16:23 +0000"  >&lt;p&gt;Cf. the new patchset.&lt;/p&gt;

&lt;p&gt;Indeed, I do see the race window between llapi_lease_put() and llapi_fswap_layouts() but I can&apos;t see any userland API that would allow us to get rid of it. Am I missing something? You&apos;re stressing the need to do operations atomically, do you have something in mind, like making the SWAP_LAYOUT ioctl lease-aware?&lt;/p&gt;</comment>
                            <comment id="82357" author="jay" created="Thu, 24 Apr 2014 05:48:52 +0000"  >&lt;p&gt;No, there is no API in user space ready to use.&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;You&apos;re stressing the need to do operations atomically, do you have something in mind, like making the SWAP_LAYOUT ioctl lease-aware?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Yes, similar. Instead of making SWAP_LAYOUT lease-aware, what we need is to make lease SWAP-LAYOUT aware.&lt;/p&gt;

&lt;p&gt;When we release a lease, a CLOSE RPC will be sent to MDT. We&apos;re going to pack the FID of volatile file into the RPC, with a special bias (similar to MDS_HSM_RELEASE, please check ll_close_inode_openhandle() for &apos;op_data-&amp;gt;op_bias |= MDS_HSM_RELEASE&apos;, and mdt_hsm_release()), for example, MDS_CLOSE_SWAP_LAYOUT to tell MDT that we want to unlock the release and swap the layout. We&apos;re going to extend `struct close_data&apos; to include those information. And the ioctl() of LL_IOC_SET_LEASE with F_UNLCK will be extended as well.&lt;/p&gt;

&lt;p&gt;It&apos;s totally fine for me to write a design but I think you have understood the problem and are capable of doing it yourself, based on your questions above.&lt;/p&gt;</comment>
                            <comment id="85692" author="hdoreau" created="Wed, 4 Jun 2014 13:17:33 +0000"  >&lt;p&gt;It took me a little while but I buckled down and just pushed a patch that follows the guidelines you gave me. In userland I&apos;ve extended the swap layouts ioctl to leverage the existing API and preserve compatibility (I was reluctant to change the parameters of the set lease ioctl). In kernel land it&apos;s very close to hsm_release. Hope this is fine conceptually.&lt;/p&gt;</comment>
                            <comment id="89745" author="hdoreau" created="Tue, 22 Jul 2014 16:45:27 +0000"  >&lt;p&gt;Any update on this?&lt;/p&gt;</comment>
                            <comment id="89748" author="jay" created="Tue, 22 Jul 2014 16:57:03 +0000"  >&lt;p&gt;I&apos;ll look at the patch this week, really sorry for delay.&lt;/p&gt;</comment>
                            <comment id="98489" author="fzago" created="Thu, 6 Nov 2014 00:28:55 +0000"  >&lt;p&gt;I tested rev 12 of this patch on top of head of tree. Nothing fancy:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;./llmount.sh
cd /mnt/lustre
cp /bin/ls .
 ~/lustre-release/lustre/utils/lfs migrate -o 0 ls
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;After lfs decided to output the content of ls (???) to stdout, the node proceeded to not like me and crash.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt
PID: 6463   TASK: ffff88007ac1b500  CPU: 1   COMMAND: &quot;lfs&quot;
 #0 [ffff880012b89820] machine_kexec at ffffffff81038f3b
 #1 [ffff880012b89880] crash_kexec at ffffffff810c5b62
 #2 [ffff880012b89950] oops_end at ffffffff8152c8a0
 #3 [ffff880012b89980] no_context at ffffffff8104a00b
 #4 [ffff880012b899d0] __bad_area_nosemaphore at ffffffff8104a295
 #5 [ffff880012b89a20] bad_area at ffffffff8104a3be
 #6 [ffff880012b89a50] __do_page_fault at ffffffff8104ab6f
 #7 [ffff880012b89b70] do_page_fault at ffffffff8152e7ee
 #8 [ffff880012b89ba0] page_fault at ffffffff8152bba5
    [exception RIP: ll_mdscapa_get+65]
    RIP: ffffffffa08cdc91  RSP: ffff880012b89c58  RFLAGS: 00010286
    RAX: 0000000000000000  RBX: ffff88000ccf9200  RCX: 0000000000000000
    RDX: 0000000000000001  RSI: ffff88001fb0e138  RDI: ffff88007c8303d8
    RBP: ffff880012b89c68   R8: 0000000000000000   R9: 0000000000000000
    R10: ffff88000ccf9200  R11: 0000000000000200  R12: ffff88007c8303d8
    R13: ffff88007c8303d8  R14: 0000000000000000  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff880012b89c70] ll_prep_md_op_data at ffffffffa08a0e45 [lustre]
#10 [ffff880012b89ce0] ll_prepare_close at ffffffffa0881098 [lustre]
#11 [ffff880012b89d30] ll_close_inode_openhandle at ffffffffa088a6f2 [lustre]
#12 [ffff880012b89db0] ll_file_ioctl at ffffffffa0894fc8 [lustre]
#13 [ffff880012b89e60] vfs_ioctl at ffffffff8119e422
#14 [ffff880012b89ea0] do_vfs_ioctl at ffffffff8119e8ea
#15 [ffff880012b89f30] sys_ioctl at ffffffff8119eb41
#16 [ffff880012b89f80] system_call_fastpath at ffffffff8100b072
    RIP: 0000003a522e0b37  RSP: 00007fff083e48d0  RFLAGS: 00010292
    RAX: 0000000000000010  RBX: ffffffff8100b072  RCX: 00000000545ab5fe
    RDX: 00007fff083e48c0  RSI: 00000000402066db  RDI: 0000000000000003
    RBP: 0000000000000001   R8: 000000002e1aa2e5   R9: 0000000000000010
    R10: 0000000000000000  R11: 0000000000000246  R12: 000000002e1aa2e5
    R13: 0000000000000000  R14: 0000000000000000  R15: 0000000000000003
    ORIG_RAX: 0000000000000010  CS: 0033  SS: 002b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Without this patch, I still get the junk output, but not the crash.&lt;/p&gt;





</comment>
                            <comment id="98514" author="hdoreau" created="Thu, 6 Nov 2014 13:41:48 +0000"  >&lt;p&gt;Thanks Frank. Null pointer (sbi) dereference in ll_mdscapa_get(). Fixed in patchset #13. The file content ending up in the console remains unexplained to me so far. You said it was present before, is there an open ticket for that?&lt;/p&gt;</comment>
                            <comment id="98525" author="fzago" created="Thu, 6 Nov 2014 16:57:48 +0000"  >&lt;p&gt;Thanks. The fix works, and I can migrate a file between osts now.&lt;/p&gt;

&lt;p&gt;Regarding the junk output, I found the bug in llapi_file_open_param(). I&apos;ll submit a patch soon.&lt;/p&gt;</comment>
                            <comment id="98651" author="hdoreau" created="Fri, 7 Nov 2014 12:37:10 +0000"  >&lt;p&gt;Follow-up patch, fixes numerous issues with the first one: &lt;a href=&quot;http://review.whamcloud.com/#/c/12616/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/12616/&lt;/a&gt;&lt;br/&gt;
Both patches can be merged if need be, just let me know what&apos;s preferred.&lt;/p&gt;</comment>
                            <comment id="98685" author="adilger" created="Fri, 7 Nov 2014 19:15:37 +0000"  >&lt;p&gt;Henri, I agree with Frank that we should not be landing a patch with significant known defects,since this would break the code for anyone testing this. Please merge the patches. &lt;/p&gt;</comment>
                            <comment id="99506" author="paf" created="Tue, 18 Nov 2014 19:36:45 +0000"  >&lt;p&gt;One advantage to the old approach of using group locks for migration was that it was theoretically possible to create a version of lfs migrate that could migrate a file in parallel using multiple clients.  Is this still possible with the new approach?&lt;/p&gt;</comment>
                            <comment id="99554" author="hdoreau" created="Wed, 19 Nov 2014 08:23:22 +0000"  >&lt;p&gt;Yes, it is still possible. Though an early version of the patch removed grouplock-protected migration, it has now been re-introduced. Migration can be either grouplock-protected and blocking (as before), or based on exclusive open and non-blocking (would safely abort if a concurrent process opens the file). We would need file leases to provide a notion of &quot;group&quot; to be able to implement non-blocking parallel migration too.&lt;/p&gt;</comment>
                            <comment id="99618" author="paf" created="Wed, 19 Nov 2014 19:33:33 +0000"  >&lt;p&gt;Thanks for the response, Henri.  I&apos;m glad to hear the group lock option was retained, and I see the deadlock with truncate was resolved as well.&lt;/p&gt;</comment>
                            <comment id="102649" author="green" created="Tue, 6 Jan 2015 18:10:03 +0000"  >&lt;p&gt;Just to draw attention to my comment in gerrit.&lt;br/&gt;
The latest patch still deadlocks in racer on mds, also seems to be leaking ost locks at times?&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;8.832781] LNet: Service thread pid 26108 was inactive for 62.00s. The thread mig
8.833657] Pid: 26108, comm: mdt00_007
8.833906] 
8.833907] Call Trace:
8.834350]  [&amp;lt;ffffffffa13be503&amp;gt;] ? _ldlm_lock_debug+0x2e3/0x670 [ptlrpc]
8.834649]  [&amp;lt;ffffffff81516894&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
8.834934]  [&amp;lt;ffffffff81514231&amp;gt;] schedule_timeout+0x191/0x2e0
8.835310]  [&amp;lt;ffffffff81081e50&amp;gt;] ? process_timeout+0x0/0x10
8.835629]  [&amp;lt;ffffffffa13decf0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc
8.836070]  [&amp;lt;ffffffffa13e3841&amp;gt;] ldlm_completion_ast+0x5e1/0x9b0 [ptlrpc]
8.836267]  [&amp;lt;ffffffff8105de00&amp;gt;] ? default_wake_function+0x0/0x20
8.836475]  [&amp;lt;ffffffffa13e2c8e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x7f0 [ptlrpc]
8.836735]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
8.836950]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
8.837190]  [&amp;lt;ffffffffa0574805&amp;gt;] mdt_object_local_lock+0x3c5/0xa80 [mdt]
8.837391]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
8.837638]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
8.837852]  [&amp;lt;ffffffffa0575245&amp;gt;] mdt_object_lock_internal+0x65/0x360 [mdt]
8.838074]  [&amp;lt;ffffffffa0575604&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
8.838265]  [&amp;lt;ffffffffa059328a&amp;gt;] mdt_reint_unlink+0x20a/0x10c0 [mdt]
8.838485]  [&amp;lt;ffffffffa120fa80&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
8.838676]  [&amp;lt;ffffffffa056ad25&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
8.838898]  [&amp;lt;ffffffffa05858bc&amp;gt;] ? mdt_root_squash+0x2c/0x3f0 [mdt]
8.839232]  [&amp;lt;ffffffffa1434e02&amp;gt;] ? __req_capsule_get+0x162/0x6d0 [ptlrpc]
8.839566]  [&amp;lt;ffffffffa0589aad&amp;gt;] mdt_reint_rec+0x5d/0x200 [mdt]
8.839881]  [&amp;lt;ffffffffa056f5ab&amp;gt;] mdt_reint_internal+0x4cb/0x7a0 [mdt]
8.840205]  [&amp;lt;ffffffffa056fe0b&amp;gt;] mdt_reint+0x6b/0x120 [mdt]
8.840550]  [&amp;lt;ffffffffa146e85e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
8.840915]  [&amp;lt;ffffffffa141fd64&amp;gt;] ptlrpc_main+0xdf4/0x1940 [ptlrpc]
8.841304]  [&amp;lt;ffffffffa141ef70&amp;gt;] ? ptlrpc_main+0x0/0x1940 [ptlrpc]
8.841624]  [&amp;lt;ffffffff81098c06&amp;gt;] kthread+0x96/0xa0
8.841917]  [&amp;lt;ffffffff8100c24a&amp;gt;] child_rip+0xa/0x20
8.842199]  [&amp;lt;ffffffff81098b70&amp;gt;] ? kthread+0x0/0xa0
8.842492]  [&amp;lt;ffffffff8100c240&amp;gt;] ? child_rip+0x0/0x20
8.842780] 
8.842997] LustreError: dumping log to /tmp/lustre-log.1420492523.26108
9.015282] Pid: 9643, comm: mdt00_006
9.015565] 
9.015566] Call Trace:
9.016033]  [&amp;lt;ffffffffa13be503&amp;gt;] ? _ldlm_lock_debug+0x2e3/0x670 [ptlrpc]
9.017088]  [&amp;lt;ffffffff81516894&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
9.017352]  [&amp;lt;ffffffff81514231&amp;gt;] schedule_timeout+0x191/0x2e0
9.017687]  [&amp;lt;ffffffff81081e50&amp;gt;] ? process_timeout+0x0/0x10
9.018074]  [&amp;lt;ffffffffa13decf0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc
9.018593]  [&amp;lt;ffffffffa13e3841&amp;gt;] ldlm_completion_ast+0x5e1/0x9b0 [ptlrpc]
9.018880]  [&amp;lt;ffffffff8105de00&amp;gt;] ? default_wake_function+0x0/0x20
9.019196]  [&amp;lt;ffffffffa13e2c8e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x7f0 [ptlrpc]
9.019615]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.019940]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.020220]  [&amp;lt;ffffffffa05745fb&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
9.020558]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.020860]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.021150]  [&amp;lt;ffffffffa0575245&amp;gt;] mdt_object_lock_internal+0x65/0x360 [mdt]
9.021477]  [&amp;lt;ffffffffa0575604&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
9.022987]  [&amp;lt;ffffffffa05806fc&amp;gt;] mdt_getattr_name_lock+0x103c/0x1ab0 [mdt]
9.023297]  [&amp;lt;ffffffff8128863a&amp;gt;] ? strlcpy+0x4a/0x60
9.023573]  [&amp;lt;ffffffffa140ff84&amp;gt;] ? lustre_msg_get_flags+0x34/0xb0 [ptlrpc]
9.023888]  [&amp;lt;ffffffffa14116d0&amp;gt;] ? lustre_swab_ldlm_reply+0x0/0x40 [ptlrpc]
9.024182]  [&amp;lt;ffffffffa0581692&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
9.024493]  [&amp;lt;ffffffffa056e064&amp;gt;] mdt_intent_policy+0x494/0xce0 [mdt]
9.024789]  [&amp;lt;ffffffffa13c305f&amp;gt;] ldlm_lock_enqueue+0x12f/0x950 [ptlrpc]
9.025140]  [&amp;lt;ffffffffa10b9201&amp;gt;] ? cfs_hash_for_each_enter+0x1/0xa0 [libcfs]
9.025454]  [&amp;lt;ffffffffa13eedeb&amp;gt;] ldlm_handle_enqueue0+0x51b/0x13e0 [ptlrpc]
9.025747]  [&amp;lt;ffffffffa146dc72&amp;gt;] tgt_enqueue+0x62/0x1d0 [ptlrpc]
9.026616]  [&amp;lt;ffffffffa146e85e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
9.026970]  [&amp;lt;ffffffffa141fd64&amp;gt;] ptlrpc_main+0xdf4/0x1940 [ptlrpc]
9.027313]  [&amp;lt;ffffffffa141ef70&amp;gt;] ? ptlrpc_main+0x0/0x1940 [ptlrpc]
9.027622]  [&amp;lt;ffffffff81098c06&amp;gt;] kthread+0x96/0xa0
9.028084]  [&amp;lt;ffffffff8100c24a&amp;gt;] child_rip+0xa/0x20
9.029247] Pid: 6818, comm: mdt01_002
9.029453] 
9.029453] Call Trace:
9.029739]  [&amp;lt;ffffffffa13be503&amp;gt;] ? _ldlm_lock_debug+0x2e3/0x670 [ptlrpc]
9.029980]  [&amp;lt;ffffffff81516894&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
9.030164]  [&amp;lt;ffffffff81514231&amp;gt;] schedule_timeout+0x191/0x2e0
9.030341]  [&amp;lt;ffffffff81081e50&amp;gt;] ? process_timeout+0x0/0x10
9.030579]  [&amp;lt;ffffffffa13decf0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc
9.031058]  [&amp;lt;ffffffffa13e3841&amp;gt;] ldlm_completion_ast+0x5e1/0x9b0 [ptlrpc]
9.031335]  [&amp;lt;ffffffff8105de00&amp;gt;] ? default_wake_function+0x0/0x20
9.031630]  [&amp;lt;ffffffffa13e2c8e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x7f0 [ptlrpc]
9.032198]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.032491]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.032773]  [&amp;lt;ffffffffa05745fb&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
9.033094]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.033387]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.033675]  [&amp;lt;ffffffffa0575245&amp;gt;] mdt_object_lock_internal+0x65/0x360 [mdt]
9.033988]  [&amp;lt;ffffffffa0575604&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
9.034264]  [&amp;lt;ffffffffa05806fc&amp;gt;] mdt_getattr_name_lock+0x103c/0x1ab0 [mdt]
9.034544]  [&amp;lt;ffffffff8128863a&amp;gt;] ? strlcpy+0x4a/0x60
9.034891]  [&amp;lt;ffffffffa140ff84&amp;gt;] ? lustre_msg_get_flags+0x34/0xb0 [ptlrpc]
9.035203]  [&amp;lt;ffffffffa14116d0&amp;gt;] ? lustre_swab_ldlm_reply+0x0/0x40 [ptlrpc]
9.035495]  [&amp;lt;ffffffffa0581692&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
9.035774]  [&amp;lt;ffffffffa056e064&amp;gt;] mdt_intent_policy+0x494/0xce0 [mdt]
9.036192]  [&amp;lt;ffffffffa13c305f&amp;gt;] ldlm_lock_enqueue+0x12f/0x950 [ptlrpc]
9.036479]  [&amp;lt;ffffffffa10b9201&amp;gt;] ? cfs_hash_for_each_enter+0x1/0xa0 [libcfs]
9.036788]  [&amp;lt;ffffffffa13eedeb&amp;gt;] ldlm_handle_enqueue0+0x51b/0x13e0 [ptlrpc]
9.037136]  [&amp;lt;ffffffffa146dc72&amp;gt;] tgt_enqueue+0x62/0x1d0 [ptlrpc]
9.037429]  [&amp;lt;ffffffffa146e85e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
9.037735]  [&amp;lt;ffffffffa141fd64&amp;gt;] ptlrpc_main+0xdf4/0x1940 [ptlrpc]
9.039938]  [&amp;lt;ffffffffa141ef70&amp;gt;] ? ptlrpc_main+0x0/0x1940 [ptlrpc]
9.040213]  [&amp;lt;ffffffff81098c06&amp;gt;] kthread+0x96/0xa0
9.040456]  [&amp;lt;ffffffff8100c24a&amp;gt;] child_rip+0xa/0x20
9.040707]  [&amp;lt;ffffffff81098b70&amp;gt;] ? kthread+0x0/0xa0
9.040980]  [&amp;lt;ffffffff8100c240&amp;gt;] ? child_rip+0x0/0x20
9.041230] 
9.041415] Pid: 6815, comm: mdt00_002
9.041637] 
9.041638] Call Trace:
9.042070]  [&amp;lt;ffffffffa13be503&amp;gt;] ? _ldlm_lock_debug+0x2e3/0x670 [ptlrpc]
9.042351]  [&amp;lt;ffffffff81516894&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
9.042615]  [&amp;lt;ffffffff81514231&amp;gt;] schedule_timeout+0x191/0x2e0
9.042890]  [&amp;lt;ffffffff81081e50&amp;gt;] ? process_timeout+0x0/0x10
9.043183]  [&amp;lt;ffffffffa13decf0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc
9.043656]  [&amp;lt;ffffffffa13e3841&amp;gt;] ldlm_completion_ast+0x5e1/0x9b0 [ptlrpc]
9.044075]  [&amp;lt;ffffffff8105de00&amp;gt;] ? default_wake_function+0x0/0x20
9.044366]  [&amp;lt;ffffffffa13e2c8e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x7f0 [ptlrpc]
9.044677]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.045145]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.045420]  [&amp;lt;ffffffffa05745fb&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
9.045705]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.046019]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.046309]  [&amp;lt;ffffffffa0575245&amp;gt;] mdt_object_lock_internal+0x65/0x360 [mdt]
9.046596]  [&amp;lt;ffffffffa0575604&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
9.046873]  [&amp;lt;ffffffffa05806fc&amp;gt;] mdt_getattr_name_lock+0x103c/0x1ab0 [mdt]
9.047152]  [&amp;lt;ffffffff8128863a&amp;gt;] ? strlcpy+0x4a/0x60
9.047427]  [&amp;lt;ffffffffa140ff84&amp;gt;] ? lustre_msg_get_flags+0x34/0xb0 [ptlrpc]
9.047736]  [&amp;lt;ffffffffa14116d0&amp;gt;] ? lustre_swab_ldlm_reply+0x0/0x40 [ptlrpc]
9.048031]  [&amp;lt;ffffffffa0581692&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
9.048308]  [&amp;lt;ffffffffa056e064&amp;gt;] mdt_intent_policy+0x494/0xce0 [mdt]
9.048604]  [&amp;lt;ffffffffa13c305f&amp;gt;] ldlm_lock_enqueue+0x12f/0x950 [ptlrpc]
9.048918]  [&amp;lt;ffffffffa10b9201&amp;gt;] ? cfs_hash_for_each_enter+0x1/0xa0 [libcfs]
9.049232]  [&amp;lt;ffffffffa13eedeb&amp;gt;] ldlm_handle_enqueue0+0x51b/0x13e0 [ptlrpc]
9.049544]  [&amp;lt;ffffffffa146dc72&amp;gt;] tgt_enqueue+0x62/0x1d0 [ptlrpc]
9.049852]  [&amp;lt;ffffffffa146e85e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
9.050160]  [&amp;lt;ffffffffa141fd64&amp;gt;] ptlrpc_main+0xdf4/0x1940 [ptlrpc]
9.050453]  [&amp;lt;ffffffffa141ef70&amp;gt;] ? ptlrpc_main+0x0/0x1940 [ptlrpc]
9.050720]  [&amp;lt;ffffffff81098c06&amp;gt;] kthread+0x96/0xa0
9.050970]  [&amp;lt;ffffffff8100c24a&amp;gt;] child_rip+0xa/0x20
9.051895] Pid: 6817, comm: mdt01_001
9.052114] 
9.052114] Call Trace:
9.052817]  [&amp;lt;ffffffffa13be503&amp;gt;] ? _ldlm_lock_debug+0x2e3/0x670 [ptlrpc]
9.053127]  [&amp;lt;ffffffff81516894&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
9.053391]  [&amp;lt;ffffffff81514231&amp;gt;] schedule_timeout+0x191/0x2e0
9.053653]  [&amp;lt;ffffffff81081e50&amp;gt;] ? process_timeout+0x0/0x10
9.053952]  [&amp;lt;ffffffffa13decf0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc
9.054484]  [&amp;lt;ffffffffa13e3841&amp;gt;] ldlm_completion_ast+0x5e1/0x9b0 [ptlrpc]
9.054768]  [&amp;lt;ffffffff8105de00&amp;gt;] ? default_wake_function+0x0/0x20
9.055087]  [&amp;lt;ffffffffa13e2c8e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x7f0 [ptlrpc]
9.055394]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.055697]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.056558]  [&amp;lt;ffffffffa05745fb&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
9.056893]  [&amp;lt;ffffffffa056bbc0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
9.057247]  [&amp;lt;ffffffffa13e3260&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
9.057488]  [&amp;lt;ffffffffa0575245&amp;gt;] mdt_object_lock_internal+0x65/0x360 [mdt]
9.057687]  [&amp;lt;ffffffffa0575604&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
9.057949]  [&amp;lt;ffffffffa05806fc&amp;gt;] mdt_getattr_name_lock+0x103c/0x1ab0 [mdt]
9.058223]  [&amp;lt;ffffffff8128863a&amp;gt;] ? strlcpy+0x4a/0x60
9.058530]  [&amp;lt;ffffffffa140ff84&amp;gt;] ? lustre_msg_get_flags+0x34/0xb0 [ptlrpc]
9.058752]  [&amp;lt;ffffffffa14116d0&amp;gt;] ? lustre_swab_ldlm_reply+0x0/0x40 [ptlrpc]
9.058958]  [&amp;lt;ffffffffa0581692&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
9.059174]  [&amp;lt;ffffffffa056e064&amp;gt;] mdt_intent_policy+0x494/0xce0 [mdt]
9.059398]  [&amp;lt;ffffffffa13c305f&amp;gt;] ldlm_lock_enqueue+0x12f/0x950 [ptlrpc]
9.059629]  [&amp;lt;ffffffffa10b9201&amp;gt;] ? cfs_hash_for_each_enter+0x1/0xa0 [libcfs]
9.059858]  [&amp;lt;ffffffffa13eedeb&amp;gt;] ldlm_handle_enqueue0+0x51b/0x13e0 [ptlrpc]
9.060079]  [&amp;lt;ffffffffa146dc72&amp;gt;] tgt_enqueue+0x62/0x1d0 [ptlrpc]
9.060286]  [&amp;lt;ffffffffa146e85e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
9.060501]  [&amp;lt;ffffffffa141fd64&amp;gt;] ptlrpc_main+0xdf4/0x1940 [ptlrpc]
9.060710]  [&amp;lt;ffffffffa141ef70&amp;gt;] ? ptlrpc_main+0x0/0x1940 [ptlrpc]
9.060914]  [&amp;lt;ffffffff81098c06&amp;gt;] kthread+0x96/0xa0
9.061085]  [&amp;lt;ffffffff8100c24a&amp;gt;] child_rip+0xa/0x20
9.061275]  [&amp;lt;ffffffff81098b70&amp;gt;] ? kthread+0x0/0xa0
9.061443]  [&amp;lt;ffffffff8100c240&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="102672" author="adilger" created="Tue, 6 Jan 2015 18:59:37 +0000"  >&lt;p&gt;Dropping this from Blocker to Critical, since it is not a new issue for 2.7.0 (it exists since migrate was added in 2.4.0), and only affects a subset of users of the migrate functionality, and not anyone else.&lt;/p&gt;</comment>
                            <comment id="102674" author="jay" created="Tue, 6 Jan 2015 19:03:46 +0000"  >&lt;p&gt;I&apos;m investigating this issue.&lt;/p&gt;</comment>
                            <comment id="103214" author="jay" created="Mon, 12 Jan 2015 18:30:20 +0000"  >&lt;p&gt;please apply &lt;a href=&quot;http://review.whamcloud.com/13344&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13344&lt;/a&gt; to your tree. It worked well after that patch was applied in my test.&lt;/p&gt;</comment>
                            <comment id="103777" author="fzago" created="Fri, 16 Jan 2015 19:38:47 +0000"  >&lt;p&gt;Patch that adds some tests for the new API: &lt;a href=&quot;http://review.whamcloud.com/13441/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13441/&lt;/a&gt;&lt;br/&gt;
It has a couple questions left (see BUG?? in source) but otherwise is complete.&lt;br/&gt;
It has to be applied on top on Henri&apos;s patch.&lt;/p&gt;</comment>
                            <comment id="104058" author="green" created="Tue, 20 Jan 2015 19:35:36 +0000"  >&lt;p&gt;for the record: using Jinshan&apos;s patch did not help all that much and I was still seeing deadlocks on mdt&lt;/p&gt;</comment>
                            <comment id="104067" author="jay" created="Tue, 20 Jan 2015 20:01:47 +0000"  >&lt;p&gt;I couldn&apos;t reproduce the deadlock problem on MDT. Please collect a core dump when you see the deadlock issue again.&lt;/p&gt;</comment>
                            <comment id="105576" author="adilger" created="Tue, 3 Feb 2015 19:30:43 +0000"  >&lt;p&gt;Frank, Henri, Jinshan,&lt;br/&gt;
according to Oleg&apos;s last comments, he was still able to hit this deadlock even when the patch was applied, which raises a concern whether the risk of landing this complex patch is worth the risk at this late stage.&lt;/p&gt;

&lt;p&gt;Could you please confirm that the current patch has resolved the deadlock in your testing?  It may be that Oleg is hitting a second issue that is not directly related. &lt;/p&gt;

&lt;p&gt;The second question is whether you are currently running with this patch in your other testing and can confirm that it doesn&apos;t introduce other problems?&lt;/p&gt;</comment>
                            <comment id="105593" author="jay" created="Tue, 3 Feb 2015 20:13:10 +0000"  >&lt;p&gt;1. I can confirm that the original problem is fixed by this patch;&lt;br/&gt;
2. I tried to reproduce the problem mentioned by Oleg, i.e., running racer, for over 10 hours but I couldn&apos;t reproduce the problem. I&apos;m not saying that this issue doesn&apos;t exist but is hard to reproduce. &lt;/p&gt;</comment>
                            <comment id="105650" author="hdoreau" created="Wed, 4 Feb 2015 08:27:30 +0000"  >&lt;p&gt;Hello,&lt;/p&gt;

&lt;p&gt;same here, I have not been able to reproduce the issues aforementioned... I can try investigating from crash dumps if Oleg has any that he can share, though it&apos;s indeed harder than with a reproducer.&lt;/p&gt;

&lt;p&gt;This patch introduces no regression I&apos;m aware of, and it fixes the original problem. If its size/complexity makes it &quot;unlandable&quot; I can try to split it into two subpatches (one fixing the blocking mode, another one adding the non-blocking mode). I was actually not expecting it to grow that much &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;Edit: I have just triggered deadlocks with racer, as descibed by Oleg, &lt;b&gt;with all rpms installed&lt;/b&gt;, not simply running from the source tree. There was concurrent activity going on, but I&apos;ll try to reproduce in a non-disturbed environment.&lt;/p&gt;</comment>
                            <comment id="105704" author="jay" created="Wed, 4 Feb 2015 18:29:38 +0000"  >&lt;p&gt;Henri, that&apos;s great. Please let me know if you need any help from me.&lt;/p&gt;</comment>
                            <comment id="105708" author="green" created="Wed, 4 Feb 2015 18:51:49 +0000"  >&lt;p&gt;So in order to move things forward, and based on my understanding that this patch actually helps some user-ereported problems (and also on the assumption thatthe racer problems are now possibly a separate bug)- let&apos;s split the patch into two parts: the actual fix and the racer test patch.&lt;/p&gt;

&lt;p&gt;We can then land the code fix (if it otherwise does not introduce any more failures) and we can wait with the racer test patch until we better understand why it fails and fix those. I am a bit nervous about landing tests that tend to fail as that invalidates our testing strategy quite a bit though.&lt;/p&gt;</comment>
                            <comment id="106001" author="gerrit" created="Fri, 6 Feb 2015 09:18:30 +0000"  >&lt;p&gt;Henri Doreau (henri.doreau@cea.fr) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13669&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13669&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4840&quot; title=&quot; Deadlock when truncating file during lfs migrate&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4840&quot;&gt;&lt;del&gt;LU-4840&lt;/del&gt;&lt;/a&gt; tests: Add file migration to racer&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 739d73d58b2877711893e2df0b5f487e6b141060&lt;/p&gt;</comment>
                            <comment id="106003" author="hdoreau" created="Fri, 6 Feb 2015 09:23:18 +0000"  >&lt;p&gt;Racer patch: &lt;a href=&quot;http://review.whamcloud.com/#/c/13669&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13669&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="116737" author="gerrit" created="Thu, 28 May 2015 19:00:21 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/10013/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10013/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4840&quot; title=&quot; Deadlock when truncating file during lfs migrate&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4840&quot;&gt;&lt;del&gt;LU-4840&lt;/del&gt;&lt;/a&gt; lfs: Use file lease to implement migration&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 85bd36cc69563d7a79e3ed34f8fadb4ed1a72b7c&lt;/p&gt;</comment>
                            <comment id="117432" author="sebastien.buisson" created="Thu, 4 Jun 2015 13:32:10 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;Now that patch at &lt;a href=&quot;http://review.whamcloud.com/10013&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10013&lt;/a&gt; has been merged into master, is it possible to have a backport to b2_5?&lt;br/&gt;
Indeed, the original issue was met on one of our customer clusters running Lustre 2.4, that was updated afterwards in Lustre 2.5.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="125920" author="adilger" created="Tue, 1 Sep 2015 18:30:45 +0000"  >&lt;p&gt;Closing &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4840&quot; title=&quot; Deadlock when truncating file during lfs migrate&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4840&quot;&gt;&lt;del&gt;LU-4840&lt;/del&gt;&lt;/a&gt; since the patches here are landed to fix the problems described here, except &lt;a href=&quot;http://review.whamcloud.com/13669&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13669&lt;/a&gt; to enable racer with object migrate.  &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7073&quot; title=&quot;racer with OST object migration hangs on cleanup&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7073&quot;&gt;&lt;del&gt;LU-7073&lt;/del&gt;&lt;/a&gt; has been opened to track the new racer + object migrate failure.&lt;/p&gt;</comment>
                            <comment id="159888" author="riauxjb" created="Tue, 26 Jul 2016 15:10:22 +0000"  >&lt;p&gt;Backport to b2_7_fe &lt;a href=&quot;http://review.whamcloud.com/#/c/21513/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/21513/&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="31216">LU-6903</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="30886">LU-6785</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="27576">LU-5915</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="31828">LU-7073</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="14758" name="migration.png" size="15274" author="jay" created="Tue, 22 Apr 2014 00:20:27 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwiv3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13336</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>