<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:28:32 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2827] mdt_intent_fixup_resent() cannot find the proper lock in hash</title>
                <link>https://jira.whamcloud.com/browse/LU-2827</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;If a successful reply is lost for an intent lock request, MDS will not correctly recover from this situation on resend.&lt;/p&gt;

&lt;p&gt;The cause of which seems to be the code of ldlm_handle_enqueue0() and mdt_intent_fixup_resent()&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; ldlm_handle_enqueue0(struct ldlm_namespace *ns,
                         struct ptlrpc_request *req,
                         &lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; struct ldlm_request *dlm_req,
                         &lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; struct ldlm_callback_suite *cbs)
{
...
        &lt;span class=&quot;code-comment&quot;&gt;/* The lock&apos;s callback data might be set in the policy function */&lt;/span&gt;
        lock = ldlm_lock_create(ns, &amp;amp;dlm_req-&amp;gt;lock_desc.l_resource.lr_name,
                                dlm_req-&amp;gt;lock_desc.l_resource.lr_type,
                                dlm_req-&amp;gt;lock_desc.l_req_mode,
                                cbs, NULL, 0);
...
        lock-&amp;gt;l_export = class_export_lock_get(req-&amp;gt;rq_export, lock);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (lock-&amp;gt;l_export-&amp;gt;exp_lock_hash) {
                cfs_hash_add(lock-&amp;gt;l_export-&amp;gt;exp_lock_hash,
                             &amp;amp;lock-&amp;gt;l_remote_handle,
                             &amp;amp;lock-&amp;gt;l_exp_hash);
        }
...
        err = ldlm_lock_enqueue(ns, &amp;amp;lock, cookie, &amp;amp;flags);
...
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; void mdt_intent_fixup_resent(struct mdt_thread_info *info,
                                    struct ldlm_lock *new_lock,
                                    struct ldlm_lock **old_lock,
                                    struct mdt_lock_handle *lh)
{
        struct ptlrpc_request  *req = mdt_info_req(info);
        struct obd_export      *exp = req-&amp;gt;rq_export;
        struct lustre_handle    remote_hdl;
        struct ldlm_request    *dlmreq;
        struct ldlm_lock       *lock;

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!(lustre_msg_get_flags(req-&amp;gt;rq_reqmsg) &amp;amp; MSG_RESENT))
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt;;

        dlmreq = req_capsule_client_get(info-&amp;gt;mti_pill, &amp;amp;RMF_DLM_REQ);
        remote_hdl = dlmreq-&amp;gt;lock_handle[0];

        lock = cfs_hash_lookup(exp-&amp;gt;exp_lock_hash, &amp;amp;remote_hdl);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (lock) {
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (lock != new_lock) {
...
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;On resend, ldlm_handle_enqueue0() add the new lock into hash even though there&apos;s already a granted lock with the same remote handle. mdt_intent_fixup_resent() will find the newly added lock in hash and ignore it. This will cause to an enqueue request on the newly created lock, deadlock and client eviction.&lt;/p&gt;

&lt;p&gt;Alexey thinks that the problem has existed since we moved from the correct code:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; void fixup_handle_for_resent_req(struct ptlrpc_request *req, &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; offset,
                                        struct ldlm_lock *new_lock,
                                        struct ldlm_lock **old_lock,
                                        struct lustre_handle *lockh)
{
        struct obd_export *exp = req-&amp;gt;rq_export;
        struct ldlm_request *dlmreq =
                lustre_msg_buf(req-&amp;gt;rq_reqmsg, offset, sizeof(*dlmreq));
        struct lustre_handle remote_hdl = dlmreq-&amp;gt;lock_handle[0];
        struct list_head *iter;

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!(lustre_msg_get_flags(req-&amp;gt;rq_reqmsg) &amp;amp; MSG_RESENT))
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt;;

        spin_lock(&amp;amp;exp-&amp;gt;exp_ldlm_data.led_lock);
        list_for_each(iter, &amp;amp;exp-&amp;gt;exp_ldlm_data.led_held_locks) {
                struct ldlm_lock *lock;
                lock = list_entry(iter, struct ldlm_lock, l_export_chain);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (lock == new_lock)
                        &lt;span class=&quot;code-keyword&quot;&gt;continue&lt;/span&gt;; &amp;lt;==================== N.B.
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (lock-&amp;gt;l_remote_handle.cookie == remote_hdl.cookie) {
                        lockh-&amp;gt;cookie = lock-&amp;gt;l_handle.h_cookie;
                        LDLM_DEBUG(lock, &lt;span class=&quot;code-quote&quot;&gt;&quot;restoring lock cookie&quot;&lt;/span&gt;);
                        DEBUG_REQ(D_DLMTRACE, req,&lt;span class=&quot;code-quote&quot;&gt;&quot;restoring lock cookie &quot;&lt;/span&gt;LPX64,
                                  lockh-&amp;gt;cookie);
                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (old_lock)
                                *old_lock = LDLM_LOCK_GET(lock);
                        spin_unlock(&amp;amp;exp-&amp;gt;exp_ldlm_data.led_lock);
                        &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt;;
                }
        }
...
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Logs for this issue will follow.&lt;/p&gt;</description>
                <environment></environment>
        <key id="17609">LU-2827</key>
            <summary>mdt_intent_fixup_resent() cannot find the proper lock in hash</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yujian">Jian Yu</assignee>
                                    <reporter username="panda">Andrew Perepechko</reporter>
                        <labels>
                            <label>LB</label>
                            <label>llnl</label>
                    </labels>
                <created>Mon, 18 Feb 2013 14:00:01 +0000</created>
                <updated>Mon, 2 Nov 2015 13:41:20 +0000</updated>
                            <resolved>Fri, 20 Jun 2014 05:03:00 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>24</watches>
                                                                            <comments>
                            <comment id="52649" author="panda" created="Mon, 18 Feb 2013 16:08:48 +0000"  >&lt;p&gt;Logs for the issue (xid 1427344285761632)&lt;/p&gt;</comment>
                            <comment id="52650" author="panda" created="Mon, 18 Feb 2013 16:09:54 +0000"  >&lt;p&gt;The original patch presumably introducing the issue: &lt;a href=&quot;https://bugzilla.lustre.org/show_bug.cgi?id=16777&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugzilla.lustre.org/show_bug.cgi?id=16777&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52927" author="shadow" created="Sat, 23 Feb 2013 12:57:50 +0000"  >&lt;p&gt;per xyratex internal discussion - change cfs_hash to insert lock in tail instead of head looks enough as short term fix, but if we have a too much resends we may double hash size and that will produce speed down for lookup operation - so.. we need look to better solution.&lt;/p&gt;</comment>
                            <comment id="53013" author="keith" created="Tue, 26 Feb 2013 02:46:15 +0000"  >&lt;p&gt;Under what conditions are you seeing a successful reply being lost for an intent lock request?  How can the problem be recreated?&lt;/p&gt;</comment>
                            <comment id="53052" author="panda" created="Tue, 26 Feb 2013 20:27:58 +0000"  >&lt;p&gt;A reply can be lost because of a network error.&lt;/p&gt;

&lt;p&gt;A simple reproducer for this issue, I believe, is:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[root@panda tests]# touch /mnt/lustre/file
[root@panda tests]# lctl set_param fail_loc=0x8000030c
fail_loc=0x8000030c
[root@panda tests]# setfattr -n user.attr -v value /mnt/lustre/file
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="53064" author="keith" created="Tue, 26 Feb 2013 22:27:02 +0000"  >&lt;p&gt;The network should not loose things.  Can you be more specific?&lt;/p&gt;</comment>
                            <comment id="53108" author="shadow" created="Wed, 27 Feb 2013 08:11:43 +0000"  >&lt;p&gt;Keith,&lt;/p&gt;

&lt;p&gt;you really think network should be don&apos;t loose something? &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;PS. LNet don&apos;t have a guaranteed a delivering packet and delivering order. Network may have a flap or packet lost.. it&apos;s &lt;em&gt;NORMAL&lt;/em&gt;.&lt;/p&gt;
</comment>
                            <comment id="53110" author="green" created="Wed, 27 Feb 2013 08:14:30 +0000"  >&lt;p&gt;Well, it is true that ideally network should not lose anything. In practice, though, packets do get lost sometimes, and we must deal with this unfortunate fact of life correctly.&lt;/p&gt;

&lt;p&gt;The exact reason of why a reply might have been lost is not as important here (in case of hte reproducer it is lost deliberately as a test)&lt;/p&gt;</comment>
                            <comment id="53387" author="keith" created="Tue, 5 Mar 2013 15:38:09 +0000"  >&lt;p&gt;I think &quot;lost&quot; just means different things to different people &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;  I didn&apos;t mean to derail any discussion about mdt_intent_fixup_resent.  &lt;/p&gt;

&lt;p&gt;As step one on this issue I will work to add a test. &lt;/p&gt;</comment>
                            <comment id="53613" author="adilger" created="Fri, 8 Mar 2013 14:09:15 +0000"  >&lt;p&gt;Panda, do you already have a patch for this bug?  Could you please push it to Gerrit?&lt;/p&gt;</comment>
                            <comment id="53748" author="keith" created="Mon, 11 Mar 2013 22:05:04 +0000"  >&lt;p&gt;I don&apos;t seem to be able to trigger a deadlock and client eviction in my local test vms.  &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@panda tests]# touch /mnt/lustre/file
[root@panda tests]# lctl set_param fail_loc=0x8000030c
fail_loc=0x8000030c
[root@panda tests]# setfattr -n user.attr -v value /mnt/lustre/file
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I tried adding more io but I was not able to trigger any form of a deadlock or eviction. &lt;/p&gt;

&lt;p&gt;Would you mind writing a Lustre test for the deadlock and eviction that you see? It will help to address the issue. &lt;/p&gt;</comment>
                            <comment id="53782" author="panda" created="Tue, 12 Mar 2013 06:20:32 +0000"  >&lt;p&gt;What does dmesg show after running setfattr?&lt;/p&gt;</comment>
                            <comment id="53798" author="panda" created="Tue, 12 Mar 2013 13:15:32 +0000"  >&lt;p&gt;&amp;gt; Panda, do you already have a patch for this bug? Could you please push it to Gerrit?&lt;/p&gt;

&lt;p&gt;No, I don&apos;t have a patch. I could trying coding if we agree how it can be fixed best. I can&apos;t agree with Alexey that changing hash to link new entries to tail instead of head is a good solution.&lt;/p&gt;</comment>
                            <comment id="53808" author="keith" created="Tue, 12 Mar 2013 14:47:19 +0000"  >&lt;p&gt;I don&apos;t have those logs at this point that VMs setup is pretty busy right now. &lt;/p&gt;

&lt;p&gt;I don&apos;t recall seeing anything about setfattr in the dmesg. &lt;br/&gt;
I recall there was something about lost connection with the MDS  (I slept for a while) and then a MDS Reconnection when file_loc when back to 0.  &lt;/p&gt;

&lt;p&gt;What do you see on your system? How do you trigger this issue?&lt;/p&gt;

</comment>
                            <comment id="53956" author="keith" created="Wed, 13 Mar 2013 17:52:25 +0000"  >&lt;p&gt;Lucky day.  I was cleaning up some old terminals and I found the output from one of my reproduction experiments. &lt;/p&gt;

&lt;p&gt;I created a test replay-single test 91 .  I think there was a 60 second sleep in this version before reverting the fail_loc status.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: == replay-single test 91: time out lock replay (3764) == 18:48:14 (1363052894)
Lustre: 3258:0:(client.c:1866:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1363052895/real 1363052895]  req@ffff88003ba76000 x1429244805151980/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff88003d8e2c00@192.168.56.103@tcp:12/10 lens 576/1080 e 0 to 1 dl 1363052902 ref 2 fl Rpc:X/0/ffffffff rc 0/-1
Lustre: lustre-MDT0000-mdc-ffff88003d8e2c00: Connection to lustre-MDT0000 (at 192.168.56.103@tcp) was lost; in progress operations using this service will wait for recovery to complete
Lustre: lustre-MDT0000-mdc-ffff88003d8e2c00: Connection restored to lustre-MDT0000 (at 192.168.56.103@tcp)
Lustre: DEBUG MARKER: == replay-single test complete, duration 62 sec == 18:49:12 (1363052952)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Can you provide your debug log or test for this issue?  I should have some machines cycles tomorrow if you have further suggestions on how to reproduce the issue. &lt;/p&gt;</comment>
                            <comment id="53980" author="panda" created="Thu, 14 Mar 2013 02:46:21 +0000"  >&lt;p&gt;The defect clearly reproduces for me with WC repository commit 9fb5d1ff837880a7596d98f24ff6bb1bf0033f2b:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[root@panda tests]# bash llmount.sh 
Stopping clients: panda /mnt/lustre (opts:)
Stopping clients: panda /mnt/lustre2 (opts:)
Loading modules from /mnt/nfs/xyratex/lustre-release/lustre/tests/..
detected 4 online CPUs by sysfs
Force libcfs to create 2 CPU partitions
../libcfs/libcfs/libcfs options: &lt;span class=&quot;code-quote&quot;&gt;&apos;cpu_npartitions=2&apos;&lt;/span&gt;
debug=vfstrace rpctrace dlmtrace neterror ha config ioctl &lt;span class=&quot;code-keyword&quot;&gt;super&lt;/span&gt;
subsystem_debug=all -lnet -lnd -pinger
gss/krb5 is not supported
quota/lquota options: &lt;span class=&quot;code-quote&quot;&gt;&apos;hash_lqs_cur_bits=3&apos;&lt;/span&gt;
Formatting mgs, mds, osts
Format mds1: /tmp/lustre-mdt1
Format ost1: /tmp/lustre-ost1
Format ost2: /tmp/lustre-ost2
Checking servers environments
Checking clients panda environments
Loading modules from /mnt/nfs/xyratex/lustre-release/lustre/tests/..
detected 4 online CPUs by sysfs
Force libcfs to create 2 CPU partitions
debug=vfstrace rpctrace dlmtrace neterror ha config ioctl &lt;span class=&quot;code-keyword&quot;&gt;super&lt;/span&gt;
subsystem_debug=all -lnet -lnd -pinger
gss/krb5 is not supported
Setup mgs, mdt, osts
Starting mds1:   -o loop /tmp/lustre-mdt1 /mnt/mds1
Started lustre-MDT0000
Starting ost1:   -o loop /tmp/lustre-ost1 /mnt/ost1
Started lustre-OST0000
Starting ost2:   -o loop /tmp/lustre-ost2 /mnt/ost2
Started lustre-OST0001
Starting client: panda: -o user_xattr,flock panda@tcp:/lustre /mnt/lustre
Using TIMEOUT=20
enable jobstats, set job scheduler as procname_uid
Waiting 90 secs &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; update
Updated after 2s: wanted &lt;span class=&quot;code-quote&quot;&gt;&apos;procname_uid&apos;&lt;/span&gt; got &lt;span class=&quot;code-quote&quot;&gt;&apos;procname_uid&apos;&lt;/span&gt;
disable quota as required
[root@panda tests]# cd /mnt/lustre
[root@panda lustre]# touch /mnt/lustre/file
[root@panda lustre]# lctl set_param fail_loc=0x8000030c
fail_loc=0x8000030c
[root@panda lustre]# setfattr -n user.attr -v value /mnt/lustre/file
[root@panda lustre]# dmesg
Lustre: Mounted lustre-client
Lustre: DEBUG MARKER: Using TIMEOUT=20
Lustre: ctl-lustre-MDT0000: &lt;span class=&quot;code-keyword&quot;&gt;super&lt;/span&gt;-sequence allocation rc = 0 [0x0000000200000400-0x0000000240000400):0:mdt
Lustre: *** cfs_fail_loc=30c, val=2147483648***
LustreError: 30243:0:(ldlm_lib.c:2422:target_send_reply_msg()) @@@ dropping reply  req@ffff88007d7d6850 x1429464107975760/t0(0) o101-&amp;gt;ab287eba-76b5-0c4e-723e-377c1d2b0157@0@lo:0/0 lens 576/624 e 0 to 0 dl 1363243290 ref 1 fl Interpret:/0/0 rc 0/0
Lustre: 30711:0:(client.c:1866:ptlrpc_expire_one_request()) @@@ Request sent has timed out &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; slow reply: [sent 1363243284/real 1363243284]  req@ffff88006b6db400 x1429464107975760/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff88006b6db800@0@lo:12/10 lens 576/1048 e 0 to 1 dl 1363243291 ref 2 fl Rpc:X/0/ffffffff rc 0/-1
Lustre: lustre-MDT0000-mdc-ffff88006b6db800: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; service will wait &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; recovery to complete
Lustre: lustre-MDT0000: Client ab287eba-76b5-0c4e-723e-377c1d2b0157 (at 0@lo) reconnecting
Lustre: lustre-MDT0000-mdc-ffff88006b6db800: Connection restored to lustre-MDT0000 (at 0@lo)
LNet: Service thread pid 30243 was inactive &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 40.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; debugging purposes:
Pid: 30243, comm: mdt01_001

Call Trace:
 [&amp;lt;ffffffff814ead32&amp;gt;] schedule_timeout+0x192/0x2e0
 [&amp;lt;ffffffff8107cb50&amp;gt;] ? process_timeout+0x0/0x10
 [&amp;lt;ffffffffa0fe46d1&amp;gt;] cfs_waitq_timedwait+0x11/0x20 [libcfs]
 [&amp;lt;ffffffffa12bb22d&amp;gt;] ldlm_completion_ast+0x4ed/0x960 [ptlrpc]
 [&amp;lt;ffffffffa12b6950&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x390 [ptlrpc]
 [&amp;lt;ffffffff8105fa40&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa12ba968&amp;gt;] ldlm_cli_enqueue_local+0x1f8/0x5d0 [ptlrpc]
 [&amp;lt;ffffffffa12bad40&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
 [&amp;lt;ffffffffa0981c60&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
 [&amp;lt;ffffffffa098492b&amp;gt;] mdt_object_lock0+0x33b/0xaf0 [mdt]
 [&amp;lt;ffffffffa0981c60&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
 [&amp;lt;ffffffffa12bad40&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
 [&amp;lt;ffffffffa09851a4&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
 [&amp;lt;ffffffffa0985211&amp;gt;] mdt_object_find_lock+0x61/0x170 [mdt]
 [&amp;lt;ffffffffa09a7671&amp;gt;] mdt_reint_setxattr+0x461/0x1850 [mdt]
 [&amp;lt;ffffffffa1164c70&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
 [&amp;lt;ffffffffa09811e5&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
 [&amp;lt;ffffffffa099bf8c&amp;gt;] ? mdt_root_squash+0x2c/0x410 [mdt]
 [&amp;lt;ffffffffa130fe96&amp;gt;] ? __req_capsule_get+0x166/0x700 [ptlrpc]
 [&amp;lt;ffffffffa09a0781&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
 [&amp;lt;ffffffffa0999de3&amp;gt;] mdt_reint_internal+0x4e3/0x7d0 [mdt]
 [&amp;lt;ffffffffa099a114&amp;gt;] mdt_reint+0x44/0xe0 [mdt]
 [&amp;lt;ffffffffa098b008&amp;gt;] mdt_handle_common+0x628/0x1620 [mdt]
 [&amp;lt;ffffffffa09c27d5&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
 [&amp;lt;ffffffffa12f402c&amp;gt;] ptlrpc_server_handle_request+0x41c/0xdf0 [ptlrpc]
 [&amp;lt;ffffffffa0fe45de&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
 [&amp;lt;ffffffffa12eb759&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
 [&amp;lt;ffffffff81052223&amp;gt;] ? __wake_up+0x53/0x70
 [&amp;lt;ffffffffa12f5576&amp;gt;] ptlrpc_main+0xb76/0x1870 [ptlrpc]
 [&amp;lt;ffffffffa12f4a00&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffffa12f4a00&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffffa12f4a00&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20

LustreError: dumping log to /tmp/lustre-log.1363243331.30243
LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 157s: evicting client at 0@lo  ns: mdt-ffff8800cd97e000 lock: ffff8800cdaa2600/0x3d633a3159cdb915 lrc: 3/0,0 mode: PR/PR res: 8589935616/1 bits 0x1b rrc: 3 type: IBT flags: 0x200000000020 nid: 0@lo remote: 0x3d633a3159cdb8f9 expref: 9 pid: 30243 timeout: 4297311737 lvb_type: 0
LNet: Service thread pid 30243 completed after 150.27s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
LustreError: 11-0: lustre-MDT0000-mdc-ffff88006b6db800: Communicating with 0@lo, operation obd_ping failed with -107.
Lustre: lustre-MDT0000-mdc-ffff88006b6db800: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; service will wait &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; recovery to complete
LustreError: 167-0: lustre-MDT0000-mdc-ffff88006b6db800: This client was evicted by lustre-MDT0000; in progress operations using &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; service will fail.
Lustre: lustre-MDT0000-mdc-ffff88006b6db800: Connection restored to lustre-MDT0000 (at 0@lo)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="54055" author="keith" created="Thu, 14 Mar 2013 17:21:31 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@client tests]# bash llmount.sh 
Stopping clients: client /mnt/lustre (opts:)
Stopping clients: client /mnt/lustre2 (opts:)
Loading modules from /usr/lib64/lustre/tests/..
detected 1 online CPUs by sysfs
libcfs will create CPU partition based on online CPUs
debug=vfstrace rpctrace dlmtrace neterror ha config ioctl super
subsystem_debug=all -lnet -lnd -pinger
gss/krb5 is not supported
quota/lquota options: &apos;hash_lqs_cur_bits=3&apos;
Formatting mgs, mds, osts
Format mds1: /dev/sdb
Format ost1: /dev/sdb
Checking servers environments
Checking clients client environments
Loading modules from /usr/lib64/lustre/tests/..
detected 1 online CPUs by sysfs
libcfs will create CPU partition based on online CPUs
debug=vfstrace rpctrace dlmtrace neterror ha config ioctl super
subsystem_debug=all -lnet -lnd -pinger
gss/krb5 is not supported
Setup mgs, mdt, osts
Starting mds1:   /dev/sdb /mnt/mds1
Started lustre-MDT0000
Starting ost1:   /dev/sdb /mnt/ost1
Started lustre-OST0000
Starting client: client: -o user_xattr,flock mds@tcp:/lustre /mnt/lustre
Starting client client: -o user_xattr,flock mds@tcp:/lustre /mnt/lustre
Started clients client: 
mds@tcp:/lustre on /mnt/lustre type lustre (rw,user_xattr,flock)
Using TIMEOUT=20
enable jobstats, set job scheduler as procname_uid
Waiting 90 secs for update
Updated after 4s: wanted &apos;procname_uid&apos; got &apos;procname_uid&apos;
disable quota as required
[root@client tests]#  cd /mnt/lustre
[root@client lustre]# touch /mnt/lustre/file
[root@client lustre]# lctl set_param fail_loc=0x8000030c
fail_loc=0x8000030c
[root@client lustre]# setfattr -n user.attr -v value /mnt/lustre/file
[root@client lustre]# dmesg 
&amp;lt;snip&amp;gt;
Lustre: Mounted lustre-client
Lustre: DEBUG MARKER: Using TIMEOUT=20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I have waited way past 157s (well over 15 min) and there is no change to dmesg. This is the same behavior I saw before. &lt;/p&gt;

&lt;p&gt;My version is Lustre: Build Version: 2.3.62--PRISTINE-2.6.32-279.19.1.el6_lustre.x86_64 . It is a Master build from last week it is post 9fb5d1ff837880a7596d98f24ff6bb1bf0033f2b by a good bit. &lt;/p&gt;

&lt;p&gt;Any more ideas for reproduction? I am inclined to think this is not a generic Lustre Master issue. &lt;/p&gt;</comment>
                            <comment id="54080" author="green" created="Fri, 15 Mar 2013 01:46:50 +0000"  >&lt;p&gt;Keith, I don&apos;t see the mark for the fail injection to hit in your logs?&lt;br/&gt;
The &quot;Lustre: *** cfs_fail_loc=30c, val=2147483648***&quot; message?&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Starting client: client: -o user_xattr,flock mds@tcp:/lustre /mnt/lustre
...
[root@client lustre]# lctl set_param fail_loc=0x8000030c
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;What is this mds node that you mount? The failloc needs to be injected on the mds node and you seems to be running on a client? (unless mds is just a fancy alias to client in your setup)&lt;/p&gt;</comment>
                            <comment id="54088" author="panda" created="Fri, 15 Mar 2013 02:38:10 +0000"  >&lt;p&gt;I&apos;ve just checked out the latest version: 20baff1f7f77b02f629e5f7980807ad151423b3b.&lt;br/&gt;
The issue is still hit.&lt;/p&gt;

&lt;p&gt;And I&apos;m sorry if my explanation on fail_loc was not detailed enough.&lt;br/&gt;
Just as Oleg said, the fail loc needs to be set on the MDS. Actually, llmount without additional parameters sets up a single-node configuration, so I thought I did not have to mention that.&lt;/p&gt;</comment>
                            <comment id="54129" author="keith" created="Fri, 15 Mar 2013 14:43:42 +0000"  >&lt;p&gt;I learn something every day &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;  I do manage 3 small VMS. I haven&apos;t use fail_loc outside of the test framework context thanks for the info.  My inital attemps to repro used the testframework so I think the fail_loc was set properly there. &lt;/p&gt;


&lt;p&gt;Ok so a repeat of the test setting the fail_loc on the mds. &lt;/p&gt;

&lt;p&gt;From the mds &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: Using TIMEOUT=20
Lustre: ctl-lustre-MDT0000: super-sequence allocation rc = 0 [0x0000000200000400-0x0000000240000400):0:mdt
Lustre: *** cfs_fail_loc=30c, val=2147483648***
LustreError: 1304:0:(ldlm_lib.c:2422:target_send_reply_msg()) @@@ dropping reply  req@ffff880034a99000 x1429516885090712/t0(0) o101-&amp;gt;6ea10d6d-cb72-918b-ed2d-ac3bfeae442f@192.168.56.101@tcp:0/0 lens 576/568 e 0 to 0 dl 1363372217 ref 1 fl Interpret:/0/0 rc 0/0
Lustre: lustre-MDT0000: Client 6ea10d6d-cb72-918b-ed2d-ac3bfeae442f (at 192.168.56.101@tcp) reconnecting
[root@mds ~]#
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;from the client&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;ustre: Mounted lustre-client
Lustre: DEBUG MARKER: Using TIMEOUT=20
Lustre: 3056:0:(client.c:1866:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1363372217/real 1363372217]  req@ffff88003a3d7800 x1429516885090712/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff88003b0bc000@192.168.56.103@tcp:12/10 lens 576/1080 e 0 to 1 dl 1363372224 ref 2 fl Rpc:X/0/ffffffff rc 0/-1
Lustre: lustre-MDT0000-mdc-ffff88003b0bc000: Connection to lustre-MDT0000 (at 192.168.56.103@tcp) was lost; in progress operations using this service will wait for recovery to complete
Lustre: lustre-MDT0000-mdc-ffff88003b0bc000: Connection restored to lustre-MDT0000 (at 192.168.56.103@tcp)
[root@client mnt]#
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The &quot;setfattr -n user.attr -v value /mnt/lustre/file&quot; hung for a small amount of time 10-20 seconds maybe. &lt;/p&gt;
</comment>
                            <comment id="54131" author="keith" created="Fri, 15 Mar 2013 15:05:34 +0000"  >&lt;p&gt;All right I rebooted everything and did it again and this time I got it. &lt;/p&gt;

&lt;p&gt;from the mds: &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: Lustre: Build Version: 2.3.62--PRISTINE-2.6.32-279.19.1.el6_lustre.x86_64
LDISKFS-fs (sdb): mounted filesystem with ordered data mode. quota=on. Opts: 
LDISKFS-fs (sdb): mounted filesystem with ordered data mode. quota=on. Opts: 
Lustre: Setting parameter lustre-MDT0000-mdtlov.lov.stripesize in log lustre-MDT0000
Lustre: ctl-lustre-MDT0000: No data found on store. Initialize space
Lustre: lustre-MDT0000: new disk, initializing
Lustre: 1413:0:(mdt_lproc.c:380:lprocfs_wr_identity_upcall()) lustre-MDT0000: identity upcall set to /usr/sbin/l_getidentity
LustreError: 11-0: lustre-MDT0000-lwp-MDT0000: Communicating with 0@lo, operation mds_connect failed with -11.
Lustre: DEBUG MARKER: Using TIMEOUT=20
Lustre: ctl-lustre-MDT0000: super-sequence allocation rc = 0 [0x0000000200000400-0x0000000240000400):0:mdt
Lustre: *** cfs_fail_loc=30c, val=2147483648***
LustreError: 1390:0:(ldlm_lib.c:2422:target_send_reply_msg()) @@@ dropping reply  req@ffff88003453d000 x1429600593772960/t0(0) o101-&amp;gt;27ba2a8e-a9c1-f2e8-0c49-e7d56aa2868c@192.168.56.101@tcp:0/0 lens 576/624 e 0 to 0 dl 1363373869 ref 1 fl Interpret:/0/0 rc 0/0
Lustre: lustre-MDT0000: Client 27ba2a8e-a9c1-f2e8-0c49-e7d56aa2868c (at 192.168.56.101@tcp) reconnecting
LNet: Service thread pid 1390 was inactive for 40.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
Pid: 1390, comm: mdt00_001

Call Trace:
 [&amp;lt;ffffffff814ead12&amp;gt;] schedule_timeout+0x192/0x2e0
 [&amp;lt;ffffffff8107cb50&amp;gt;] ? process_timeout+0x0/0x10
 [&amp;lt;ffffffffa03376d1&amp;gt;] cfs_waitq_timedwait+0x11/0x20 [libcfs]
 [&amp;lt;ffffffffa065622d&amp;gt;] ldlm_completion_ast+0x4ed/0x960 [ptlrpc]
 [&amp;lt;ffffffffa0651950&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x390 [ptlrpc]
 [&amp;lt;ffffffff8105fa40&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0655968&amp;gt;] ldlm_cli_enqueue_local+0x1f8/0x5d0 [ptlrpc]
 [&amp;lt;ffffffffa0655d40&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
 [&amp;lt;ffffffffa0d1cc60&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
 [&amp;lt;ffffffffa0d1f92b&amp;gt;] mdt_object_lock0+0x33b/0xaf0 [mdt]
 [&amp;lt;ffffffffa0d1cc60&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
 [&amp;lt;ffffffffa0655d40&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
 [&amp;lt;ffffffffa0d201a4&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
 [&amp;lt;ffffffffa0d20211&amp;gt;] mdt_object_find_lock+0x61/0x170 [mdt]
 [&amp;lt;ffffffffa0d42671&amp;gt;] mdt_reint_setxattr+0x461/0x1850 [mdt]
 [&amp;lt;ffffffffa04fcc80&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
 [&amp;lt;ffffffffa0d1c1e5&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
 [&amp;lt;ffffffffa0d36f8c&amp;gt;] ? mdt_root_squash+0x2c/0x410 [mdt]
 [&amp;lt;ffffffffa06aaeb6&amp;gt;] ? __req_capsule_get+0x166/0x700 [ptlrpc]
 [&amp;lt;ffffffffa0d3b781&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
 [&amp;lt;ffffffffa0d34de3&amp;gt;] mdt_reint_internal+0x4e3/0x7d0 [mdt]
 [&amp;lt;ffffffffa0d35114&amp;gt;] mdt_reint+0x44/0xe0 [mdt]
 [&amp;lt;ffffffffa0d26008&amp;gt;] mdt_handle_common+0x628/0x1620 [mdt]
 [&amp;lt;ffffffffa0d5e6e5&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
 [&amp;lt;ffffffffa068f04c&amp;gt;] ptlrpc_server_handle_request+0x41c/0xdf0 [ptlrpc]
 [&amp;lt;ffffffffa03375de&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
 [&amp;lt;ffffffffa0686799&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
 [&amp;lt;ffffffff81052223&amp;gt;] ? __wake_up+0x53/0x70
 [&amp;lt;ffffffffa0690596&amp;gt;] ptlrpc_main+0xb76/0x1870 [ptlrpc]
 [&amp;lt;ffffffffa068fa20&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffffa068fa20&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffffa068fa20&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20

LustreError: dumping log to /tmp/lustre-log.1363373910.1390
LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 157s: evicting client at 192.168.56.101@tcp  ns: mdt-ffff88003acdf000 lock: ffff88003cfb3000/0xd91a117c53c742cb lrc: 3/0,0 mode: PR/PR res: 8589935616/2 bits 0x1b rrc: 3 type: IBT flags: 0x200000000020 nid: 192.168.56.101@tcp remote: 0xedc553c6f6f67ee9 expref: 8 pid: 1390 timeout: 4295563571 lvb_type: 0
LNet: Service thread pid 1390 completed after 150.43s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;Would you mind making a Lustre test for this issue? &lt;/p&gt;</comment>
                            <comment id="54298" author="green" created="Mon, 18 Mar 2013 20:01:03 +0000"  >&lt;p&gt;Keith, since you are able to reproduce it now, I think it&apos;s trivial for you to create the test.&lt;br/&gt;
In fact it would be great if you can just fix the existign test that tests for this problem (and is broken for ages as it appears because of teh change in locking we depended on to trigger it) and fix it.&lt;/p&gt;</comment>
                            <comment id="54314" author="keith" created="Mon, 18 Mar 2013 22:50:42 +0000"  >&lt;p&gt;Oleg which test tests for this presently? I can can take care of the test. &lt;/p&gt;


&lt;p&gt;Is there a clear path forward to fix the issue?&lt;/p&gt;</comment>
                            <comment id="54437" author="keith" created="Wed, 20 Mar 2013 01:12:26 +0000"  >&lt;p&gt;Well a test not working at all or it just does not fail much.&lt;/p&gt;

&lt;p&gt;This is with a 10 min wait from the MDS dmesg: &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: Using TIMEOUT=20
Lustre: DEBUG MARKER: == replay-single test 91: Find proper lock in hash LU-2827 == 17:06:09 (1363737969)
Lustre: *** cfs_fail_loc=30c, val=2147483648***
LustreError: 3835:0:(ldlm_lib.c:2422:target_send_reply_msg()) @@@ dropping reply  req@ffff880034928800 x1429982887805068/t0(0) o101-&amp;gt;ac9ba92b-1fcc-8ab8-fe18-f7a10315f583@192.168.56.101@tcp:0/0 lens 576/568 e 0 to 0 dl 1363737965 ref 1 fl Interpret:/0/0 rc 0/0
Lustre: lustre-MDT0000: Client ac9ba92b-1fcc-8ab8-fe18-f7a10315f583 (at 192.168.56.101@tcp) reconnecting
Lustre: DEBUG MARKER: == replay-single test complete, duration 631 sec == 17:16:22 (1363738582)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The fail_loc is set on the mds we don&apos;t seem to trigger the hang.  &lt;/p&gt;


&lt;p&gt;This is the test that is not working. &lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;#LU-2827 mdt_intent_fixup_resent need to find the proper lock in hash
test_91() {
        touch $DIR/$tname
        #define OBD_FAIL_LDLM_REPLY              0x30c
        do_facet $SINGLEMDS &quot;lctl set_param fail_loc=0x8000030c&quot;
        setfattr -n user.attr -v value $DIR/$tname
        sleep 600
        #check the state of the system 
        sanity_mount_check ||
                error &quot;Client has been evicted&quot;
        do_facet $SINGLEMDS &quot;lctl set_param fail_loc=0x0&quot;
}
run_test 91 &quot;Find proper lock in hash LU-2827&quot;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I tried it by hand a few times (including a fresh reboot) exactly the same way I triggered it before and did not encounter the error.  I see the client  reconnecting quickly and no eviction. &lt;/p&gt;

&lt;p&gt;Andrew you seem to reproduce this quite easily.  How many attempts does it take you to trigger it?  What should the timeout be from the setfattr to the mount check?  My inital thought was 180s but I escalated it thinking I was not waiting long enough? &lt;/p&gt;

&lt;p&gt;I am going to run a 5 min wait loop overnight to see if I can catch the error again. &lt;/p&gt;
</comment>
                            <comment id="54450" author="panda" created="Wed, 20 Mar 2013 09:40:08 +0000"  >&lt;p&gt;Hello Keith!&lt;/p&gt;

&lt;p&gt;The issue has been reproducing very easily for me. Actually, I had to try a lot to make the test not work.&lt;br/&gt;
However, it seems, there is a race condition that can be hard to reproduce with your multi-node configuration.&lt;/p&gt;

&lt;p&gt;This is how it works for me.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00010000:00010000:1.0:1363770071.095970:0:2835:0:(ldlm_lock.c:631:ldlm_add_bl_work_item()) ### lock incompatible; sending blocking AST. ns: mdt-ffff8801281dd000 lock: ffff8800c85
d8480/0x37eadca6bd308e8b lrc: 2/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 3 type: IBT flags: 0x4000000 nid: 0@lo remote: 0x37eadca6bd308e6f expref: 8 pid: 3330 timeout: 0
=============
00010000:00010000:1.0:1363770071.095987:0:2835:0:(ldlm_lock.c:631:ldlm_add_bl_work_item()) ### lock incompatible; sending blocking AST. ns: mdt-ffff8801281dd000 lock: ffff8800c85
d8a80/0x37eadca6bd308eae lrc: 2/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 3 type: IBT flags: 0x4000000 nid: 0@lo remote: 0x37eadca6bd308e6f expref: 8 pid: 2835 timeout: 0
00010000:00000001:1.0:1363770071.096007:0:2835:0:(ldlm_lock.c:666:ldlm_add_ast_work_item()) &lt;span class=&quot;code-object&quot;&gt;Process&lt;/span&gt; leaving
=============
00010000:00010000:1.0:1363770071.096129:0:2835:0:(ldlm_lockd.c:834:ldlm_server_blocking_ast()) ### server preparing blocking AST ns: mdt-ffff8801281dd000 lock: ffff8800c85d8a80/0x37eadca6bd308eae lrc: 3/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 3 type: IBT flags: 0x4000020 nid: 0@lo remote: 0x37eadca6bd308e6f expref: 8 pid: 2835 timeout: 0
00010000:00010000:1.0:1363770071.096144:0:2835:0:(ldlm_lockd.c:487:ldlm_add_waiting_lock()) ### adding to wait list(timeout: 150, AT: on) ns: mdt-ffff8801281dd000 lock: ffff8800c85d8a80/0x37eadca6bd308eae lrc: 4/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 3 type: IBT flags: 0x4000020 nid: 0@lo remote: 0x37eadca6bd308e6f expref: 8 pid: 2835 timeout: 4299631388

00010000:00010000:1.0:1363770071.096482:0:2835:0:(ldlm_lockd.c:834:ldlm_server_blocking_ast()) ### server preparing blocking AST ns: mdt-ffff8801281dd000 lock: ffff8800c85d8480/0x37eadca6bd308e8b lrc: 3/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 3 type: IBT flags: 0x4000020 nid: 0@lo remote: 0x37eadca6bd308e6f expref: 8 pid: 3330 timeout: 0
00010000:00010000:1.0:1363770071.096499:0:2835:0:(ldlm_lockd.c:487:ldlm_add_waiting_lock()) ### adding to wait list(timeout: 150, AT: on) ns: mdt-ffff8801281dd000 lock: ffff8800c85d8480/0x37eadca6bd308e8b lrc: 4/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 3 type: IBT flags: 0x4000020 nid: 0@lo remote: 0x37eadca6bd308e6f expref: 8 pid: 3330 timeout: 4299631389

==============
00010000:00010000:0.0:1363770071.097126:0:2972:0:(ldlm_lockd.c:1524:ldlm_handle_bl_callback()) ### client blocking AST callback handler ns: lustre-MDT0000-mdc-ffff8800cdb54c00 lock: ffff8800cd89abc0/0x37eadca6bd308e6f lrc: 2/0,0 mode: PR/PR res: 8589935619/4 bits 0x3 rrc: 1 type: IBT flags: 0x10000000 nid: local remote: 0x37eadca6bd308eae expref: -99 pid: 3430 timeout: 0
00010000:00010000:1.0:1363770071.097574:0:3217:0:(ldlm_lockd.c:1524:ldlm_handle_bl_callback()) ### client blocking AST callback handler ns: lustre-MDT0000-mdc-ffff8800cdb54c00 lock: ffff8800cd89abc0/0x37eadca6bd308e6f lrc: 3/0,0 mode: --/PR res: 8589935619/4 bits 0x3 rrc: 1 type: IBT flags: 0x30002090 nid: local remote: 0x37eadca6bd308eae expref: -99 pid: 3430 timeout: 0
00010000:00010000:1.0:1363770071.097614:0:3217:0:(ldlm_request.c:1251:ldlm_cli_cancel()) ### lock is already being destroyed &amp;lt;==================== no reply sent?
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;However, if two cancels are processed sequentially as compared to parallel execution ldlm_callback_handler() will return -EINVAL:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        lock = ldlm_handle2lock_long(&amp;amp;dlm_req-&amp;gt;lock_handle[0], 0);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!lock) {
                CDEBUG(D_DLMTRACE, &lt;span class=&quot;code-quote&quot;&gt;&quot;callback on lock &quot;&lt;/span&gt;LPX64&lt;span class=&quot;code-quote&quot;&gt;&quot; - lock &quot;&lt;/span&gt;
                       &lt;span class=&quot;code-quote&quot;&gt;&quot;disappeared\n&quot;&lt;/span&gt;, dlm_req-&amp;gt;lock_handle[0].cookie);
                rc = ldlm_callback_reply(req, -EINVAL);
                ldlm_callback_errmsg(req, &lt;span class=&quot;code-quote&quot;&gt;&quot;Operate with invalid parameter&quot;&lt;/span&gt;, rc,
                                     &amp;amp;dlm_req-&amp;gt;lock_handle[0]);
                RETURN(0);
        }

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; ((lock-&amp;gt;l_flags &amp;amp; LDLM_FL_FAIL_LOC) &amp;amp;&amp;amp;
            lustre_msg_get_opc(req-&amp;gt;rq_reqmsg) == LDLM_BL_CALLBACK)
                OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);

        &lt;span class=&quot;code-comment&quot;&gt;/* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */&lt;/span&gt;
        lock_res_and_lock(lock);
        lock-&amp;gt;l_flags |= ldlm_flags_from_wire(dlm_req-&amp;gt;lock_flags &amp;amp;
                                              LDLM_AST_FLAGS);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (lustre_msg_get_opc(req-&amp;gt;rq_reqmsg) == LDLM_BL_CALLBACK) {
                /* If somebody cancels lock and cache is already dropped,
                 * or lock is failed before cp_ast received on client,
                 * we can tell the server we have no lock. Otherwise, we
                 * should send cancel after dropping the cache. */
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (((lock-&amp;gt;l_flags &amp;amp; LDLM_FL_CANCELING) &amp;amp;&amp;amp;
                    (lock-&amp;gt;l_flags &amp;amp; LDLM_FL_BL_DONE)) ||
                    (lock-&amp;gt;l_flags &amp;amp; LDLM_FL_FAILED)) {
                        LDLM_DEBUG(lock, &lt;span class=&quot;code-quote&quot;&gt;&quot;callback on lock &quot;&lt;/span&gt;
                                   LPX64&lt;span class=&quot;code-quote&quot;&gt;&quot; - lock disappeared\n&quot;&lt;/span&gt;,
                                   dlm_req-&amp;gt;lock_handle[0].cookie);
                        unlock_res_and_lock(lock);
                        LDLM_LOCK_RELEASE(lock);
                        rc = ldlm_callback_reply(req, -EINVAL);
                        ldlm_callback_errmsg(req, &lt;span class=&quot;code-quote&quot;&gt;&quot;Operate on stale lock&quot;&lt;/span&gt;, rc,
                                             &amp;amp;dlm_req-&amp;gt;lock_handle[0]);
                        RETURN(0);
                }
                /* BL_AST locks are not needed in LRU.
                 * Let ldlm_cancel_lru() be fast. */
                ldlm_lock_remove_from_lru(lock);
                lock-&amp;gt;l_flags |= LDLM_FL_BL_AST;
        }
        unlock_res_and_lock(lock);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Perhaps, adding an OBD_RACE barrier right before the ldlm_handle2lock_long() call might help the issue become more easily reproducible.&lt;/p&gt;</comment>
                            <comment id="54451" author="panda" created="Wed, 20 Mar 2013 10:00:12 +0000"  >&lt;p&gt;By the way, sleep 600 is not required by your test. And could you, please, make sure that sanity_mount_check() really checks if there was an eviction?&lt;/p&gt;</comment>
                            <comment id="54476" author="keith" created="Wed, 20 Mar 2013 16:27:52 +0000"  >&lt;p&gt;I have yet to repro the issue with the test.  I will be sure sanity_mount_check  or some other check is the right thing to catch eviction. &lt;/p&gt;

&lt;p&gt;Perhaps I will work  on this in a single VM environment as over night testing didn&apos;t get me anything.  The one time I triggered it (documented up above) was right from a fresh reboot.&lt;/p&gt;
</comment>
                            <comment id="54542" author="keith" created="Thu, 21 Mar 2013 07:44:44 +0000"  >&lt;p&gt;I put the beta test here &lt;a href=&quot;http://review.whamcloud.com/5795&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5795&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;A timeout is need so that we can detect the error and take correct actions. &lt;br/&gt;
Likely when we detect the error we will need to recreate a working FS. &lt;/p&gt;

&lt;p&gt;I tried to recreate the issue in a single VM for a while today but I didn&apos;t get the error.&lt;/p&gt;

&lt;p&gt;Can you try the test to see if it fails for you? Perhaps you can help finish the test as you seem to be able to redly reproduce the issue and I don&apos;t for some reason. &lt;/p&gt;
</comment>
                            <comment id="54821" author="green" created="Tue, 26 Mar 2013 05:41:56 +0000"  >&lt;p&gt;Keith, the problem you have is your tet creates the double lock but does not really try to revoke it.&lt;br/&gt;
Now, if you do revocation (I have some comments in the patch), the problem is clearly seen, but in a somewhat unexpected way.&lt;br/&gt;
You might want to run it with debug logs and check in the debug log that the second lock got created on resend and then find out how can you cancel that lock - this will trigger lock timeout and client eviction.&lt;/p&gt;

&lt;p&gt;Previous test for this was replay-single test 52 (I think, still cannot find original report I digged way back in the future), but now stat does not cancel the lock at all (not that it ever did too, I guess).&lt;/p&gt;

&lt;p&gt;Potentially mds intent lock is ill suited for this test now, and instead we might want to do a test that drops lock reply on ost and then resend creates another one, and then we cause cancel (by e.g. unlinking a file or doing stat from another client - important to do it from another client to avoid ELC cancelling our copy of the lock beforehand)&lt;/p&gt;</comment>
                            <comment id="55066" author="keith" created="Fri, 29 Mar 2013 02:41:13 +0000"  >&lt;p&gt;replay single test 52 stemmed from &lt;a href=&quot;https://projectlava.xyratex.com/show_bug.cgi?id=3764&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://projectlava.xyratex.com/show_bug.cgi?id=3764&lt;/a&gt; (circa 2004)&lt;/p&gt;

&lt;p&gt;I have started working on a two client test I hope to a have code to share tomorrow. &lt;/p&gt;</comment>
                            <comment id="55248" author="keith" created="Tue, 2 Apr 2013 03:31:53 +0000"  >&lt;p&gt;Well spent good cycles trying to created the issue with 2 clients and VMs for the MDT/MDS and OSS/OST. I tried all sort of operations.  In general the first operations would block for a few seconds the things work like normal.  The fail_loc setting only really seems change the behavior on initial operations.   I did not encounter the issue today at all. &lt;/p&gt;

&lt;p&gt;I spent some time trying to sort how to drop locks on the OST but I find any examples of this. Any hints?&lt;/p&gt;
</comment>
                            <comment id="55254" author="green" created="Tue, 2 Apr 2013 05:23:05 +0000"  >&lt;p&gt;To for a lock drop, do some conflicting operation.&lt;br/&gt;
Example:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;touch /mnt/lustre/file # creates file
ls -l /mnt/lustre/file # obtains read locks on ost objects of the file
echo aaa &amp;gt;/mnt/lustre/file # attempts to write - cancels read lock in process (unlink would &lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt; too), but due to ELC client does it all by itself
# now, assuming /mnt/lustre2 is same lustre fs on different mountpoint
echo aaa &amp;gt;&amp;gt;/mnt/lustre2/file # &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; actually forces ost to cancel the lock given out from a previous operation.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="55256" author="green" created="Tue, 2 Apr 2013 05:27:04 +0000"  >&lt;p&gt;Actually, looking at the title of this ticket (duh), this is mdt_intent_fixup() bug, so it&apos;s mds only, as such, its best to concentrate on mds operations after all, like setxattr.&lt;/p&gt;

&lt;p&gt;So example from above would probably be: touch somefile ; ls -l somefile ; rm -rf othermountpoint/somefile&lt;/p&gt;

&lt;p&gt;with the reply drop around ls -l&lt;/p&gt;</comment>
                            <comment id="55334" author="panda" created="Tue, 2 Apr 2013 21:14:49 +0000"  >&lt;p&gt;Alexey, Vitaly and I have discussed a possible solution. I&apos;m coding it at the moment.&lt;/p&gt;</comment>
                            <comment id="55497" author="mdiep" created="Thu, 4 Apr 2013 16:31:23 +0000"  >&lt;p&gt;Panda,&lt;/p&gt;

&lt;p&gt;I have reproduced this in single node cluster but not in multiple nodes. Have you ever tried to reproduce this in multiple nodes cluster? If not, could you try that?&lt;/p&gt;</comment>
                            <comment id="55805" author="panda" created="Mon, 8 Apr 2013 22:04:56 +0000"  >&lt;p&gt;Minh, I haven&apos;t tried to reproduce with multi-node configurations. But, as I wrote in &lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-2827?focusedCommentId=54450&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-54450&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-2827?focusedCommentId=54450&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-54450&lt;/a&gt;, it seems clear that without special fault injection code it might be hard to reproduce the issue on a multi-node setup.&lt;/p&gt;

&lt;p&gt;I&apos;ve posted a prototype patch for discussion/review: &lt;a href=&quot;http://review.whamcloud.com/5978&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5978&lt;/a&gt; (it does not include a test case).&lt;/p&gt;</comment>
                            <comment id="55912" author="panda" created="Tue, 9 Apr 2013 17:51:12 +0000"  >&lt;p&gt;Xyratex-bug-id: MRP-975&lt;/p&gt;</comment>
                            <comment id="55995" author="panda" created="Wed, 10 Apr 2013 12:17:42 +0000"  >&lt;p&gt;A test case: &lt;a href=&quot;http://review.whamcloud.com/#change,6000&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,6000&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="57153" author="pjones" created="Fri, 26 Apr 2013 18:17:17 +0000"  >&lt;p&gt;Lowering priority. This patch is not ready and this limitation has been present in earlier version of Lustre&lt;/p&gt;</comment>
                            <comment id="57256" author="panda" created="Mon, 29 Apr 2013 19:35:35 +0000"  >&lt;p&gt;There&apos;s no new patch at the moment. We will be discussing a different solution than was coded in the gerrit patches.&lt;/p&gt;

&lt;p&gt;If Whamcloud are going to code their own patch, please let us know.&lt;/p&gt;</comment>
                            <comment id="82837" author="bfaccini" created="Wed, 30 Apr 2014 11:49:08 +0000"  >&lt;p&gt;Since both tickets (and their patches!) are highly related, I tried running the reproducer of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; against latest build #23312 from this ticket/Gerrit-change &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/#5978-8. But the MDS just hit the LASSERT(lock-&amp;gt;l_granted_mode != lock-&amp;gt;l_req_mode) that the change introduced in ldlm_process_inodebits_lock().&lt;br/&gt;
The platform I used was made of one MDS, 2 OSSs with a total of 154 OSTs, and a full default striping...&lt;/p&gt;
</comment>
                            <comment id="83103" author="simmonsja" created="Fri, 2 May 2014 20:59:55 +0000"  >&lt;p&gt;I also see this patch does NOT apply well to the b2_5 branch.&lt;/p&gt;</comment>
                            <comment id="83282" author="bfaccini" created="Tue, 6 May 2014 12:13:46 +0000"  >&lt;p&gt;I just had a look into the MDS crash-dump I got when running the reproducer of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; against latest build #23312 from this ticket/Gerrit-change &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/#5978-8, and due to the LBUG/LASSERT(lock-&amp;gt;l_granted_mode != lock-&amp;gt;l_req_mode) that the change introduced in ldlm_process_inodebits_lock(). It looks like it does not handle the scenario of a layout-lock reply that is truncated due to the size of layout exceeding the reply buffer on Client side, causing the Client to resend the request. Because in this particular case, the ldlm_lock is already granted, so I wonder why this LASSERT has been introduced starting with patch-set #6 of Change #5978 ??&lt;/p&gt;</comment>
                            <comment id="83364" author="simmonsja" created="Tue, 6 May 2014 21:30:29 +0000"  >&lt;p&gt;I just tried this patch with the LASSERT removed. For my simul run I see the following:&lt;/p&gt;

&lt;p&gt;22441.866214] INFO: task mdt03_000:14714 blocked for more than 120 seconds.&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.866546&amp;#93;&lt;/span&gt;       Tainted: P           ---------------    2.6.32-431.11.2.el6.wc.x86_64 #1&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.867007&amp;#93;&lt;/span&gt; &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.867482&amp;#93;&lt;/span&gt; mdt03_000     D 000000000000001e     0 14714      2 0x00000000&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.867840&amp;#93;&lt;/span&gt;  ffff8807d77a58c8 0000000000000046 0000000000000000 ffffffffa0f52503&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.868319&amp;#93;&lt;/span&gt;  ffff8807b7b8f190 ffff8807b7b8f138 ffff881071a8b538 ffffffffa0f52503&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.868786&amp;#93;&lt;/span&gt;  ffff8807d77a3af8 ffff8807d77a5fd8 000000000000fbc8 ffff8807d77a3af8&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.869267&amp;#93;&lt;/span&gt; Call Trace:&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.869440&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152aa15&amp;gt;&amp;#93;&lt;/span&gt; rwsem_down_failed_common+0x95/0x1d0&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.869773&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d9c32b&amp;gt;&amp;#93;&lt;/span&gt; ? ldiskfs_xattr_trusted_get+0x2b/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;ldiskfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.870218&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811ae457&amp;gt;&amp;#93;&lt;/span&gt; ? generic_getxattr+0x87/0x90&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.870516&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152aba6&amp;gt;&amp;#93;&lt;/span&gt; rwsem_down_read_failed+0x26/0x30&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.870838&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fdf073&amp;gt;&amp;#93;&lt;/span&gt; ? lod_xattr_get+0x153/0x420 &lt;span class=&quot;error&quot;&gt;&amp;#91;lod&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.871168&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8128eef4&amp;gt;&amp;#93;&lt;/span&gt; call_rwsem_down_read_failed+0x14/0x30&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.871495&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152a0a4&amp;gt;&amp;#93;&lt;/span&gt; ? down_read+0x24/0x30&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.871786&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f1c4bd&amp;gt;&amp;#93;&lt;/span&gt; mdt_object_open_lock+0x1ed/0x9d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.872141&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0efe7ac&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_attr_get_complex+0x4ec/0x770 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.872486&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f24ae7&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_open+0x15b7/0x2150 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.872818&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05bdf86&amp;gt;&amp;#93;&lt;/span&gt; ? upcall_cache_get_entry+0x296/0x880 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.873292&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa074f780&amp;gt;&amp;#93;&lt;/span&gt; ? lu_ucred+0x20/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.873608&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f0d461&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_rec+0x41/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.859651&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef2e63&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_internal+0x4c3/0x780 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.859989&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef330e&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_reint+0x1ee/0x520 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.860347&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef0bce&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x3ae/0x770 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.860694&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa086a511&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x361/0x8c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.861059&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0893b1f&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x51f/0x10f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.861443&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef1096&amp;gt;&amp;#93;&lt;/span&gt; mdt_enqueue+0x46/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.861791&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef5c5a&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x52a/0x1470 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.862134&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f32785&amp;gt;&amp;#93;&lt;/span&gt; mds_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.862476&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08c3b85&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_server_handle_request+0x385/0xc00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.862977&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05a24ce&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_timer_arm+0xe/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.863330&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05b33df&amp;gt;&amp;#93;&lt;/span&gt; ? lc_watchdog_touch+0x6f/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.863687&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08bb1d9&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa9/0x2d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.864077&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81054839&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up_common+0x59/0x90&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.864417&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08c4eed&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xaed/0x1920 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.864778&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08c4400&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1920 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.865127&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aee6&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.865390&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.865655&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ae50&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22441.865925&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22609.397953&amp;#93;&lt;/span&gt; Lustre: 14705:0:(service.c:1341:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/5), not sending early reply&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22609.397955&amp;#93;&lt;/span&gt;   req@ffff8807ca512800 x1467386901306824/t0(0) o101-&amp;gt;daaf34f5-eb04-8d8f-f1f6-350b15d96039@2@gni1:0/0 lens 656/3512 e 12 to 0 dl 1399410998 ref 2 fl Interpret:/0/0 rc 0/0&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22609.619944&amp;#93;&lt;/span&gt; Lustre: 15089:0:(service.c:1341:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/5), not sending early reply&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;22609.619946&amp;#93;&lt;/span&gt;   req@ffff8807d548d800 x1467386902355424/t0(0) o101-&amp;gt;a076348e-1b45-8004-2751-8404ca9d2c3f@28@gni1:0/0 lens 656/3512 e 12 to 0 dl 1399410998 ref 2 fl Interpret:/0/0 rc 0/0&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;23019.472325&amp;#93;&lt;/span&gt; Lustre: sultan-MDT0000: Client a076348e-1b45-8004-2751-8404ca9d2c3f (at 28@gni1) reconnecting&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;23019.476010&amp;#93;&lt;/span&gt; Lustre: sultan-MDT0000: Client 8ba0393c-b95b-703f-68d0-65583606e0ff (at 3@gni1) reconnecting&lt;/p&gt;</comment>
                            <comment id="83474" author="green" created="Thu, 8 May 2014 04:27:17 +0000"  >&lt;p&gt;Bruno: The assert was introduced specifically to catch this case.&lt;/p&gt;

&lt;p&gt;Basically what we are tryign to catch is this: Server has processed a request and returned it back to client, but client cannot accept it due to too small buffer, so it resends.&lt;br/&gt;
Now server finds this is a resend and finds the old, already granted lock. The assertion is placed in to ensure that this lock is NOT attempted to be granted again (it&apos;s already granted). And so if it hits for you, it appears some sort of a path is left uncovered to just reuse the old found lock and not retry to grant it.&lt;br/&gt;
So if you can outline how this happens so that Vitaly can update the patch, that would be great. Thanks&lt;/p&gt;</comment>
                            <comment id="83701" author="bfaccini" created="Sat, 10 May 2014 09:06:23 +0000"  >&lt;p&gt;Oleg,&lt;br/&gt;
I thought I made it clear already that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; contains already all information/reproducer/... that fully detail its specific scenario of a layout-lock reply from Server being truncated, due to reply buffer too short on Client vs layout/striping size, then causing Client to resend layout-lock request where current bug leads to a duplicated lock on Server and later Client eviction during 1st lock blocking callback process.&lt;/p&gt;

&lt;p&gt;BTW, it reproduces solid on homogeneous b2_4/b2_5/master Servers/Client platforms and again triggers the LBUG/LASSERT(lock-&amp;gt;l_granted_mode != lock-&amp;gt;l_req_mode) that the change introduced in ldlm_process_inodebits_lock() when running with this ticket/Gerrit-change &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/#5978-8.&lt;/p&gt;</comment>
                            <comment id="84461" author="bfaccini" created="Tue, 20 May 2014 16:05:18 +0000"  >&lt;p&gt;I spent some time to reproduce the MDS LBUG//LASSERT(lock-&amp;gt;l_granted_mode != lock-&amp;gt;l_req_mode) I had when running the simple reproducer from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; on a Lustre build with this ticket/Gerrit-change &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/#5978-&lt;span class=&quot;error&quot;&gt;&amp;#91;8,9&amp;#93;&lt;/span&gt; and here is what seems to happen :&lt;/p&gt;

&lt;p&gt;             _ in fact even a simple dd to create a file with a default and significant striping (I am using 154 OSTs), right after file-system start/mount triggers the problem ...&lt;/p&gt;

&lt;p&gt;             _ what I see in the Client/MDS full debug logs then is that, as expected after &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; scenario analysis, the Client resend occurs during a layout lock (MDS_INODELOCK_LAYOUT) reply from the server  that is truncated due to the striping/layout size exceeding reply buffer size :&lt;/p&gt;

&lt;p&gt;Client 1st/original request&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00010000:2.0:1400265787.405960:0:2750:0:(file.c:3998:ll_layout_refresh()) ### lustre: requeue layout lock for file [0x200002340:0x1:0x0](ffff880320dbc678)

00000080:00000001:2.0:1400265787.405962:0:2750:0:(obd_class.h:1691:md_enqueue()) Process entered
00800000:00000001:2.0:1400265787.405963:0:2750:0:(lmv_obd.c:1949:lmv_enqueue()) Process entered
00800000:00000001:2.0:1400265787.405964:0:2750:0:(lmv_obd.c:620:lmv_check_connect()) Process entered
00800000:00000001:2.0:1400265787.405965:0:2750:0:(lmv_obd.c:623:lmv_check_connect()) Process leaving (rc=0 : 0 : 0)
00800000:00000002:2.0:1400265787.405967:0:2750:0:(lmv_obd.c:1956:lmv_enqueue()) ENQUEUE &apos;layout&apos; on [0x200002340:0x1:0x0]
00800000:00000002:2.0:1400265787.405968:0:2750:0:(lmv_obd.c:1963:lmv_enqueue()) ENQUEUE &apos;layout&apos; on [0x200002340:0x1:0x0] -&amp;gt; mds #0
00800000:00000001:2.0:1400265787.405970:0:2750:0:(obd_class.h:1691:md_enqueue()) Process entered
00000002:00000001:2.0:1400265787.405971:0:2750:0:(mdc_locks.c:802:mdc_enqueue()) Process entered
00000002:00000001:2.0:1400265787.405972:0:2750:0:(mdc_locks.c:511:mdc_intent_layout_pack()) Process entered
00000100:00000010:2.0:1400265787.405973:0:2750:0:(client.c:410:ptlrpc_request_cache_alloc()) slab-alloced &apos;req&apos;: 976 at ffff8802faff9400.
00000020:00000040:2.0:1400265787.405974:0:2750:0:(genops.c:1022:class_import_get()) import ffff880328196800 refcount=6 obd=lustre-MDT0000-mdc-ffff880331322400
00010000:00000001:2.0:1400265787.405977:0:2750:0:(ldlm_request.c:766:ldlm_prep_elc_req()) Process entered
00010000:00000001:2.0:1400265787.405978:0:2750:0:(ldlm_request.c:1649:ldlm_prepare_lru_list()) Process entered
00010000:00000001:2.0:1400265787.405980:0:2750:0:(ldlm_lock.c:211:ldlm_lock_put()) Process entered
00010000:00000001:2.0:1400265787.405981:0:2750:0:(ldlm_lock.c:244:ldlm_lock_put()) Process leaving
00010000:00000001:2.0:1400265787.405982:0:2750:0:(ldlm_request.c:1771:ldlm_prepare_lru_list()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.405984:0:2750:0:(client.c:584:__ptlrpc_request_bufs_pack()) Process entered
02000000:00000001:2.0:1400265787.405985:0:2750:0:(sec.c:416:sptlrpc_req_get_ctx()) Process entered
02000000:00000001:2.0:1400265787.405986:0:2750:0:(sec.c:434:sptlrpc_req_get_ctx()) Process leaving (rc=0 : 0 : 0)
02000000:00000010:2.0:1400265787.405988:0:2750:0:(sec_null.c:174:null_alloc_reqbuf()) kmalloced &apos;req-&amp;gt;rq_reqbuf&apos;: 512 at ffff8802f7eb1800.
00000100:00000001:2.0:1400265787.405990:0:2750:0:(client.c:638:__ptlrpc_request_bufs_pack()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:2.0:1400265787.405991:0:2750:0:(ldlm_request.c:1886:ldlm_cli_cancel_list()) Process entered
00010000:00000001:2.0:1400265787.405992:0:2750:0:(ldlm_request.c:1889:ldlm_cli_cancel_list()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:2.0:1400265787.405994:0:2750:0:(ldlm_request.c:1886:ldlm_cli_cancel_list()) Process entered
00010000:00000001:2.0:1400265787.405995:0:2750:0:(ldlm_request.c:1889:ldlm_cli_cancel_list()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:2.0:1400265787.405996:0:2750:0:(ldlm_request.c:817:ldlm_prep_elc_req()) Process leaving (rc=0 : 0 : 0)
00000002:00000001:2.0:1400265787.405998:0:2750:0:(mdc_locks.c:538:mdc_intent_layout_pack()) Process leaving (rc=18446612145115206656 : -131928594344960 : ffff8802faff9400)
00000002:00000001:2.0:1400265787.406000:0:2750:0:(lustre_mdc.h:107:mdc_get_rpc_lock()) Process entered
00010000:00000001:2.0:1400265787.406001:0:2750:0:(ldlm_request.c:875:ldlm_cli_enqueue()) Process entered
00010000:00000001:2.0:1400265787.406002:0:2750:0:(ldlm_lock.c:1596:ldlm_lock_create()) Process entered
00010000:00000001:2.0:1400265787.406003:0:2750:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00010000:00000040:2.0:1400265787.406004:0:2750:0:(ldlm_resource.c:1189:ldlm_resource_getref()) getref res: ffff880325333840 count: 2
00010000:00000001:2.0:1400265787.406006:0:2750:0:(ldlm_lock.c:472:ldlm_lock_new()) Process entered
00010000:00000010:2.0:1400265787.406007:0:2750:0:(ldlm_lock.c:477:ldlm_lock_new()) slab-alloced &apos;lock&apos;: 504 at ffff880320f970c0.
00000020:00000001:2.0:1400265787.406008:0:2750:0:(lustre_handles.c:80:class_handle_hash()) Process entered
00000020:00000040:2.0:1400265787.406009:0:2750:0:(lustre_handles.c:114:class_handle_hash()) added object ffff880320f970c0 with handle 0x5121400b7cdaafb2 to hash
00000020:00000001:2.0:1400265787.406011:0:2750:0:(lustre_handles.c:115:class_handle_hash()) Process leaving
00010000:00000001:2.0:1400265787.406011:0:2750:0:(ldlm_lock.c:515:ldlm_lock_new()) Process leaving (rc=18446612145752338624 : -131927957212992 : ffff880320f970c0)
00010000:00000001:2.0:1400265787.406013:0:2750:0:(ldlm_lock.c:1636:ldlm_lock_create()) Process leaving (rc=18446612145752338624 : -131927957212992 : ffff880320f970c0)
00010000:00000001:2.0:1400265787.406015:0:2750:0:(ldlm_lock.c:274:ldlm_lock_remove_from_lru()) Process entered
00010000:00000001:2.0:1400265787.406016:0:2750:0:(ldlm_lock.c:283:ldlm_lock_remove_from_lru()) Process leaving
00010000:00010000:2.0:1400265787.406017:0:2750:0:(ldlm_lock.c:797:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(CR) ns: lustre-MDT0000-mdc-ffff880331322400 lock: ffff880320f970c0/0x5121400b7cdaafb2 lrc: 3/1,0 mode: --/CR res: [0x200002340:0x1:0x0].0 bits 0x0 rrc: 2 type: IBT flags: 0x10000000000000 nid: local remote: 0x0 expref: -99 pid: 2750 timeout: 0 lvb_type: 3
00010000:00010000:2.0:1400265787.406023:0:2750:0:(ldlm_request.c:926:ldlm_cli_enqueue()) ### client-side enqueue START, flags 1000
 ns: lustre-MDT0000-mdc-ffff880331322400 lock: ffff880320f970c0/0x5121400b7cdaafb2 lrc: 3/1,0 mode: --/CR res: [0x200002340:0x1:0x0].0 bits 0x8 rrc: 2 type: IBT flags: 0x0 nid: local remote: 0x0 expref: -99 pid: 2750 timeout: 0 lvb_type: 3
00010000:00010000:2.0:1400265787.406028:0:2750:0:(ldlm_request.c:988:ldlm_cli_enqueue()) ### sending request ns: lustre-MDT0000-mdc-ffff880331322400 lock: ffff880320f970c0/0x5121400b7cdaafb2 lrc: 3/1,0 mode: --/CR res: [0x200002340:0x1:0x0].0 bits 0x8 rrc: 2 type: IBT flags: 0x0 nid: local remote: 0x0 expref: -99 pid: 2750 timeout: 0 lvb_type: 3
00000100:00000001:2.0:1400265787.406038:0:2750:0:(client.c:2676:ptlrpc_queue_wait()) Process entered
00000100:00000001:2.0:1400265787.406039:0:2750:0:(client.c:859:ptlrpc_prep_set()) Process entered
00000100:00000010:2.0:1400265787.406040:0:2750:0:(client.c:860:ptlrpc_prep_set()) kmalloced &apos;set&apos;: 144 at ffff8803287f55c0.
00000100:00000001:2.0:1400265787.406041:0:2750:0:(client.c:876:ptlrpc_prep_set()) Process leaving (rc=18446612145878554048 : -131927830997568 : ffff8803287f55c0)
00000100:00000001:2.0:1400265787.406043:0:2750:0:(client.c:2605:ptlrpc_request_addref()) Process entered
00000100:00000001:2.0:1400265787.406044:0:2750:0:(client.c:2607:ptlrpc_request_addref()) Process leaving (rc=18446612145115206656 : -131928594344960 : ffff8802faff9400)
00000020:00000001:2.0:1400265787.406046:0:2750:0:(class_obd.c:138:lustre_get_jobid()) Process entered
00000020:00000001:2.0:1400265787.406047:0:2750:0:(class_obd.c:143:lustre_get_jobid()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.406048:0:2750:0:(client.c:2111:ptlrpc_set_wait()) Process entered
00000100:00000001:2.0:1400265787.406049:0:2750:0:(client.c:1415:ptlrpc_send_new_req()) Process entered
00000100:00000040:2.0:1400265787.406051:0:2750:0:(lustre_net.h:3296:ptlrpc_rqphase_move()) @@@ move req &quot;New&quot; -&amp;gt; &quot;Rpc&quot;  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/1544 e 0 to 0 dl 0 ref 2 fl New:/0/ffffffff rc 0/-1
00000100:00000001:2.0:1400265787.406057:0:2750:0:(client.c:1064:ptlrpc_import_delay_req()) Process entered
00000100:00000001:2.0:1400265787.406058:0:2750:0:(client.c:1115:ptlrpc_import_delay_req()) Process leaving (rc=0 : 0 : 0)
02000000:00000001:2.0:1400265787.406059:0:2750:0:(sec.c:659:sptlrpc_req_refresh_ctx()) Process entered
02000000:00000001:2.0:1400265787.406060:0:2750:0:(sec.c:686:sptlrpc_req_refresh_ctx()) Process leaving (rc=0 : 0 : 0)
00000100:00100000:2.0:1400265787.406062:0:2750:0:(client.c:1478:ptlrpc_send_new_req()) Sending RPC pname:cluuid:pid:xid:nid:opc file:a56bcdbb-ca09-9568-70fd-b3f47d606d12:2750:1466802977730748:10.10.4.17@tcp:101
00000100:00000001:2.0:1400265787.406065:0:2750:0:(niobuf.c:682:ptl_send_rpc()) Process entered
02000000:00000001:2.0:1400265787.406066:0:2750:0:(sec.c:961:sptlrpc_cli_wrap_request()) Process entered
02000000:00000001:2.0:1400265787.406066:0:2750:0:(sec.c:997:sptlrpc_cli_wrap_request()) Process leaving (rc=0 : 0 : 0)
02000000:00000001:2.0:1400265787.406068:0:2750:0:(sec.c:1689:sptlrpc_cli_alloc_repbuf()) Process entered
02000000:00000010:2.0:1400265787.406069:0:2750:0:(sec_null.c:217:null_alloc_repbuf()) kmalloced &apos;req-&amp;gt;rq_repbuf&apos;: 2048 at ffff880308075800.
02000000:00000001:2.0:1400265787.406070:0:2750:0:(sec.c:1699:sptlrpc_cli_alloc_repbuf()) Process leaving (rc=0 : 0 : 0)
00000400:00000010:2.0:1400265787.406072:0:2750:0:(lib-lnet.h:500:lnet_me_alloc()) alloc &apos;(me)&apos;: 96 at ffff880320454dc0 (tot 83133036).
00000400:00000010:2.0:1400265787.406075:0:2750:0:(lib-lnet.h:468:lnet_md_alloc()) alloc &apos;(md)&apos;: 128 at ffff880320454d40 (tot 83133164).
00000100:00000200:2.0:1400265787.406077:0:2750:0:(niobuf.c:808:ptl_send_rpc()) Setup reply buffer: 2048 bytes, xid 1466802977730748, portal 10
00000100:00000001:2.0:1400265787.406079:0:2750:0:(client.c:2605:ptlrpc_request_addref()) Process entered
00000100:00000001:2.0:1400265787.406079:0:2750:0:(client.c:2607:ptlrpc_request_addref()) Process leaving (rc=18446612145115206656 : -131928594344960 : ffff8802faff9400)
00000100:00000040:2.0:1400265787.406082:0:2750:0:(niobuf.c:829:ptl_send_rpc()) @@@ send flg=0  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/1544 e 0 to 0 dl 1400265794 ref 3 fl Rpc:/0/ffffffff rc 0/-1
00000100:00000001:2.0:1400265787.406088:0:2750:0:(niobuf.c:60:ptl_send_buf()) Process entered
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Server handle+reply&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000100:00100000:0.0:1400265787.402458:0:11385:0:(service.c:1939:ptlrpc_server_handle_req_in()) got req x1466802977730748
00000020:00000001:0.0:1400265787.402460:0:11385:0:(genops.c:741:class_conn2export()) Process entered
00000020:00000040:0.0:1400265787.402460:0:11385:0:(genops.c:753:class_conn2export()) looking for export cookie 0xd169abc5abfab541
00000020:00000001:0.0:1400265787.402462:0:11385:0:(lustre_handles.c:172:class_handle2object()) Process entered
00000020:00000040:0.0:1400265787.402463:0:11385:0:(genops.c:839:class_export_get()) GETting export ffff880212978800 : new refcount 12
00000020:00000001:0.0:1400265787.402464:0:11385:0:(lustre_handles.c:195:class_handle2object()) Process leaving (rc=18446612141216073728 : -131932493477888 : ffff880212978800)
00000020:00000001:0.0:1400265787.402466:0:11385:0:(genops.c:755:class_conn2export()) Process leaving (rc=18446612141216073728 : -131932493477888 : ffff880212978800)
00000100:00000001:0.0:1400265787.402468:0:11385:0:(service.c:1068:ptlrpc_update_export_timer()) Process entered
00000100:00000001:0.0:1400265787.402469:0:11385:0:(service.c:1081:ptlrpc_update_export_timer()) Process leaving
00000020:00000010:0.0:1400265787.402470:0:11385:0:(lu_object.c:1643:keys_init()) kmalloced &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff880268932c00.
02000000:00000010:0.0:1400265787.402472:0:11385:0:(capa.c:416:lu_capainfo_key_init()) kmalloced &apos;value&apos;: 128 at ffff880212f7cac0.
00000020:00000010:0.0:1400265787.402474:0:11385:0:(lu_ucred.c:53:lu_ucred_key_init()) kmalloced &apos;value&apos;: 72 at ffff880213336bc0.
00000020:00000010:0.0:1400265787.402475:0:11385:0:(tgt_main.c:187:tgt_ses_key_init()) kmalloced &apos;value&apos;: 144 at ffff880215e7a740.
00000100:00000040:0.0:1400265787.402477:0:11385:0:(service.c:1203:ptlrpc_at_set_timer()) armed mdt at +1s
00000100:00000001:0.0:1400265787.402479:0:11385:0:(service.c:1670:ptlrpc_server_request_add()) Process entered
00000100:00000001:0.0:1400265787.402479:0:11385:0:(service.c:1571:ptlrpc_server_hpreq_init()) Process entered
00000100:00000001:0.0:1400265787.402481:0:11385:0:(service.c:1622:ptlrpc_server_hpreq_init()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:0.0:1400265787.402482:0:11385:0:(service.c:1685:ptlrpc_server_request_add()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:0.0:1400265787.402486:0:11385:0:(service.c:2002:ptlrpc_server_handle_req_in()) Process leaving (rc=1 : 1 : 1)
00000400:00000001:0.0:1400265787.402488:0:11385:0:(watchdog.c:433:lc_watchdog_disable()) Process entered
00000400:00000001:0.0:1400265787.402489:0:11385:0:(watchdog.c:441:lc_watchdog_disable()) Process leaving
00000400:00000001:0.0:1400265787.402491:0:11385:0:(watchdog.c:416:lc_watchdog_touch()) Process entered
00000400:00000001:0.0:1400265787.402492:0:11385:0:(watchdog.c:427:lc_watchdog_touch()) Process leaving
00000100:00000001:0.0:1400265787.402493:0:11385:0:(service.c:2025:ptlrpc_server_handle_request()) Process entered
00000100:00000001:0.0:1400265787.402494:0:11385:0:(service.c:1803:ptlrpc_server_request_get()) Process entered
00000100:00100000:0.0:1400265787.402496:0:11385:0:(nrs_fifo.c:182:nrs_fifo_req_get()) NRS start fifo request from 12345-10.10.4.10@tcp, seq: 172
00000100:00000040:0.0:1400265787.402498:0:11385:0:(service.c:1841:ptlrpc_server_request_get()) RPC GETting export ffff880212978800 : new rpc_count 1
00000100:00000001:0.0:1400265787.402499:0:11385:0:(service.c:1843:ptlrpc_server_request_get()) Process leaving (rc=18446612141172062208 : -131932537489408 : ffff88020ff7f800)
00000100:00000040:0.0:1400265787.402502:0:11385:0:(lustre_net.h:3296:ptlrpc_rqphase_move()) @@@ move req &quot;New&quot; -&amp;gt; &quot;Interpret&quot;  req@ffff88020ff7f800 x1466802977730748/t0(0) o101-&amp;gt;a56bcdbb-ca09-9568-70fd-b3f47d606d12@10.10.4.10@tcp:0/0 lens 376/0 e 0 to 0 dl 1400265793 ref 1 fl New:/0/ffffffff rc 0/-1
00000100:00000001:0.0:1400265787.402508:0:11385:0:(service.c:1068:ptlrpc_update_export_timer()) Process entered
00000100:00000001:0.0:1400265787.402509:0:11385:0:(service.c:1081:ptlrpc_update_export_timer()) Process leaving
00000100:00100000:0.0:1400265787.402511:0:11385:0:(service.c:2086:ptlrpc_server_handle_request()) Handling RPC pname:cluuid+ref:pid:xid:nid:opc mdt00_002:a56bcdbb-ca09-9568-70fd-b3f47d606d12+12:2750:x1466802977730748:12345-10.10.4.10@tcp:101
00000100:00000200:0.0:1400265787.402513:0:11385:0:(service.c:2091:ptlrpc_server_handle_request()) got req 1466802977730748
00000020:00000001:0.0:1400265787.402515:0:11385:0:(tgt_handler.c:589:tgt_request_handle()) Process entered
00000020:00000001:0.0:1400265787.402516:0:11385:0:(tgt_handler.c:553:tgt_handler_find_check()) Process entered
00000020:00000001:0.0:1400265787.402517:0:11385:0:(tgt_handler.c:575:tgt_handler_find_check()) Process leaving (rc=18446744072111134752 : -1598416864 : ffffffffa0ba1820)
00000020:00000001:0.0:1400265787.402519:0:11385:0:(tgt_handler.c:492:tgt_handle_recovery()) Process entered
00000020:00000001:0.0:1400265787.402520:0:11385:0:(tgt_handler.c:542:tgt_handle_recovery()) Process leaving (rc=1 : 1 : 1)
00000020:00000001:0.0:1400265787.402522:0:11385:0:(tgt_handler.c:377:tgt_handle_request0()) Process entered
00000020:00000001:0.0:1400265787.402522:0:11385:0:(tgt_handler.c:307:tgt_request_preprocess()) Process entered
00000020:00000001:0.0:1400265787.402524:0:11385:0:(tgt_handler.c:362:tgt_request_preprocess()) Process leaving (rc=0 : 0 : 0)
00000020:00000001:0.0:1400265787.402525:0:11385:0:(tgt_handler.c:1220:tgt_enqueue()) Process entered
00010000:00000001:0.0:1400265787.402526:0:11385:0:(ldlm_lockd.c:1165:ldlm_handle_enqueue0()) Process entered
00010000:00010000:0.0:1400265787.402527:0:11385:0:(ldlm_lockd.c:1167:ldlm_handle_enqueue0()) ### server-side enqueue handler START
00010000:00000001:0.0:1400265787.402528:0:11385:0:(ldlm_lockd.c:1556:ldlm_request_cancel()) Process entered
00010000:00000001:0.0:1400265787.402529:0:11385:0:(ldlm_lockd.c:1560:ldlm_request_cancel()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:0.0:1400265787.402531:0:11385:0:(ldlm_lock.c:1596:ldlm_lock_create()) Process entered
00010000:00000001:0.0:1400265787.402532:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00010000:00000040:0.0:1400265787.402534:0:11385:0:(ldlm_resource.c:1189:ldlm_resource_getref()) getref res: ffff8802bdc23a80 count: 2
00010000:00000001:0.0:1400265787.402535:0:11385:0:(ldlm_lock.c:472:ldlm_lock_new()) Process entered
00010000:00000010:0.0:1400265787.402537:0:11385:0:(ldlm_lock.c:477:ldlm_lock_new()) slab-alloced &apos;lock&apos;: 504 at ffff880212a98ac0.
00000020:00000001:0.0:1400265787.402538:0:11385:0:(lustre_handles.c:80:class_handle_hash()) Process entered
00000020:00000040:0.0:1400265787.402539:0:11385:0:(lustre_handles.c:114:class_handle_hash()) added object ffff880212a98ac0 with handle 0xd169abc5abfab66e to hash
00000020:00000001:0.0:1400265787.402541:0:11385:0:(lustre_handles.c:115:class_handle_hash()) Process leaving
00010000:00000001:0.0:1400265787.402542:0:11385:0:(ldlm_lock.c:515:ldlm_lock_new()) Process leaving (rc=18446612141217254080 : -131932492297536 : ffff880212a98ac0)
00010000:00000001:0.0:1400265787.402544:0:11385:0:(ldlm_lock.c:1636:ldlm_lock_create()) Process leaving (rc=18446612141217254080 : -131932492297536 : ffff880212a98ac0)
00010000:00010000:0.0:1400265787.402546:0:11385:0:(ldlm_lockd.c:1258:ldlm_handle_enqueue0()) ### server-side enqueue handler, new lock created ns: mdt-lustre-MDT0000_UUID lock: ffff880212a98ac0/0xd169abc5abfab66e lrc: 2/0,0 mode: --/CR res: [0x200002340:0x1:0x0].0 bits 0x0 rrc: 2 type: IBT flags: 0x40000000000000 nid: local remote: 0x5121400b7cdaafb2 expref: -99 pid: 11385 timeout: 0 lvb_type: 0
00010000:00000040:0.0:1400265787.402552:0:11385:0:(ldlm_lockd.c:1270:ldlm_handle_enqueue0()) lock GETting export ffff880212978800 : new locks_count 9
00000020:00000040:0.0:1400265787.402553:0:11385:0:(genops.c:839:class_export_get()) GETting export ffff880212978800 : new refcount 13
00010000:00000001:0.0:1400265787.402555:0:11385:0:(ldlm_lock.c:1666:ldlm_lock_enqueue()) Process entered
00000004:00000001:0.0:1400265787.402556:0:11385:0:(mdt_handler.c:3497:mdt_intent_policy()) Process entered
00000004:00000001:0.0:1400265787.402558:0:11385:0:(mdt_handler.c:3434:mdt_intent_opc()) Process entered
00000004:00000001:0.0:1400265787.402559:0:11385:0:(mdt_handler.c:2735:mdt_unpack_req_pack_rep()) Process entered
00000004:00000001:0.0:1400265787.402560:0:11385:0:(mdt_handler.c:2753:mdt_unpack_req_pack_rep()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.402562:0:11385:0:(mdt_handler.c:3247:mdt_intent_layout()) Process entered
00000004:00000001:0.0:1400265787.402562:0:11385:0:(mdt_handler.c:2181:mdt_object_find()) Process entered
00000004:00000040:0.0:1400265787.402563:0:11385:0:(mdt_handler.c:2183:mdt_object_find()) Find object for [0x200002340:0x1:0x0]
00000020:00000001:0.0:1400265787.402565:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000020:00000001:0.0:1400265787.402566:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000004:00000001:0.0:1400265787.402568:0:11385:0:(mdt_handler.c:2190:mdt_object_find()) Process leaving (rc=18446612141272470360 : -131932437081256 : ffff880215f41358)
00000004:00000001:0.0:1400265787.402570:0:11385:0:(mdd_object.c:238:mdd_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.402572:0:11385:0:(lod_object.c:498:lod_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.402575:0:11385:0:(lod_object.c:502:lod_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.402577:0:11385:0:(mdd_object.c:257:mdd_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.402579:0:11385:0:(mdt_internal.h:559:mdt_object_put()) Process entered
00000020:00000001:0.0:1400265787.402579:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000004:00000001:0.0:1400265787.402581:0:11385:0:(mdt_internal.h:561:mdt_object_put()) Process leaving
00000100:00000001:0.0:1400265787.402582:0:11385:0:(pack_generic.c:337:lustre_pack_reply_v2()) Process entered
02000000:00000001:0.0:1400265787.402583:0:11385:0:(sec.c:2113:sptlrpc_svc_alloc_rs()) Process entered
02000000:00000010:0.0:1400265787.402587:0:11385:0:(sec_null.c:320:null_alloc_rs()) kmalloced &apos;rs&apos;: 4336 at ffff88020f756000.
02000000:00000001:0.0:1400265787.402588:0:11385:0:(sec.c:2149:sptlrpc_svc_alloc_rs()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:0.0:1400265787.402590:0:11385:0:(pack_generic.c:371:lustre_pack_reply_v2()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.402592:0:11385:0:(mdt_handler.c:3284:mdt_intent_layout()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.402593:0:11385:0:(mdt_handler.c:3483:mdt_intent_opc()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.402594:0:11385:0:(mdt_handler.c:3536:mdt_intent_policy()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:0.0:1400265787.402596:0:11385:0:(ldlm_inodebits.c:188:ldlm_process_inodebits_lock()) Process entered
00010000:00000001:0.0:1400265787.402597:0:11385:0:(ldlm_inodebits.c:90:ldlm_inodebits_compat_queue()) Process entered
00010000:00000001:0.0:1400265787.402598:0:11385:0:(ldlm_inodebits.c:163:ldlm_inodebits_compat_queue()) Process leaving (rc=1 : 1 : 1)
00010000:00000001:0.0:1400265787.402600:0:11385:0:(ldlm_inodebits.c:90:ldlm_inodebits_compat_queue()) Process entered
00010000:00000001:0.0:1400265787.402601:0:11385:0:(ldlm_inodebits.c:163:ldlm_inodebits_compat_queue()) Process leaving (rc=1 : 1 : 1)
00010000:00000001:0.0:1400265787.402602:0:11385:0:(ldlm_lock.c:1146:ldlm_grant_lock()) Process entered
00010000:00000001:0.0:1400265787.402603:0:11385:0:(ldlm_lock.c:1123:ldlm_grant_lock_with_skiplist()) Process entered
00010000:00000001:0.0:1400265787.402604:0:11385:0:(ldlm_lock.c:1003:search_granted_lock()) Process entered
00010000:00000001:0.0:1400265787.402605:0:11385:0:(ldlm_lock.c:1074:search_granted_lock()) Process leaving
00010000:00000001:0.0:1400265787.402605:0:11385:0:(ldlm_lock.c:1086:ldlm_granted_list_add_lock()) Process entered
00010000:00000040:0.0:1400265787.402606:0:11385:0:(ldlm_resource.c:1414:ldlm_resource_dump()) --- Resource: [0x200002340:0x1:0x0].0 (ffff8802bdc23a80) refcount = 2
00010000:00000040:0.0:1400265787.402608:0:11385:0:(ldlm_resource.c:1417:ldlm_resource_dump()) Granted locks (in reverse order):
00010000:00000040:0.0:1400265787.402610:0:11385:0:(ldlm_resource.c:1420:ldlm_resource_dump()) ### ### ns: mdt-lustre-MDT0000_UUID lock: ffff880212a982c0/0xd169abc5abfab64b lrc: 2/0,0 mode: PR/PR res: [0x200002340:0x1:0x0].0 bits 0x2 rrc: 2 type: IBT flags: 0x40200000000000 nid: 10.10.4.10@tcp remote: 0x5121400b7cdaafa4 expref: 13 pid: 11385 timeout: 0 lvb_type: 0
00010000:00010000:0.0:1400265787.402617:0:11385:0:(ldlm_lock.c:1091:ldlm_granted_list_add_lock()) ### About to add lock: ns: mdt-lustre-MDT0000_UUID lock: ffff880212a98ac0/0xd169abc5abfab66e lrc: 3/0,0 mode: CR/CR res: [0x200002340:0x1:0x0].0 bits 0x8 rrc: 2 type: IBT flags: 0x50000000000000 nid: 10.10.4.10@tcp remote: 0x5121400b7cdaafb2 expref: 13 pid: 11385 timeout: 0 lvb_type: 3
00010000:00000001:0.0:1400265787.402622:0:11385:0:(ldlm_lock.c:1113:ldlm_granted_list_add_lock()) Process leaving
00010000:00000001:0.0:1400265787.402623:0:11385:0:(ldlm_lock.c:1129:ldlm_grant_lock_with_skiplist()) Process leaving
00010000:00000001:0.0:1400265787.402624:0:11385:0:(ldlm_pool.c:345:ldlm_srv_pool_recalc()) Process entered
00010000:00000001:0.0:1400265787.402625:0:11385:0:(ldlm_pool.c:349:ldlm_srv_pool_recalc()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:0.0:1400265787.402626:0:11385:0:(ldlm_lock.c:1165:ldlm_grant_lock()) Process leaving
00010000:00000001:0.0:1400265787.402627:0:11385:0:(ldlm_inodebits.c:238:ldlm_process_inodebits_lock()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:0.0:1400265787.402628:0:11385:0:(ldlm_lock.c:1762:ldlm_lock_enqueue()) Process leaving via out (rc=0 : 0 : 0x0)
00010000:00000001:0.0:1400265787.402630:0:11385:0:(ldlm_lockd.c:1380:ldlm_handle_enqueue0()) Process leaving
00010000:00010000:0.0:1400265787.402632:0:11385:0:(ldlm_lockd.c:1393:ldlm_handle_enqueue0()) ### server-side enqueue handler, sending reply(err=0, rc=0) ns: mdt-lustre-MDT0000_UUID lock: ffff880212a98ac0/0xd169abc5abfab66e lrc: 3/0,0 mode: CR/CR res: [0x200002340:0x1:0x0].0 bits 0x8 rrc: 2 type: IBT flags: 0x40000000000000 nid: 10.10.4.10@tcp remote: 0x5121400b7cdaafb2 expref: 13 pid: 11385 timeout: 0 lvb_type: 3
00000004:00000001:0.0:1400265787.402639:0:11385:0:(mdt_lvb.c:104:mdt_lvbo_fill()) Process entered
00000020:00000010:0.0:1400265787.402640:0:11385:0:(lu_object.c:1643:keys_init()) kmalloced &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff880212baaa00.
00000020:00000010:0.0:1400265787.402642:0:11385:0:(lu_object.c:429:lu_global_key_init()) kmalloced &apos;value&apos;: 512 at ffff8801f657ce00.
00000020:00000010:0.0:1400265787.402645:0:11385:0:(dt_object.c:55:dt_global_key_init()) kmalloced &apos;value&apos;: 1216 at ffff88032a027000.
00000040:00000010:0.0:1400265787.402647:0:11385:0:(llog_obd.c:278:llog_key_init()) kmalloced &apos;value&apos;: 224 at ffff8802b9611ac0.
00000020:00000010:0.0:1400265787.402649:0:11385:0:(tgt_main.c:146:tgt_key_init()) kmalloced &apos;value&apos;: 1152 at ffff8803277a9800.
80000000:00000010:0.0:1400265787.402651:0:11385:0:(fld_handler.c:60:fld_key_init()) kmalloced &apos;value&apos;: 72 at ffff880212e4a440.
40000000:00000010:0.0:1400265787.402653:0:11385:0:(fid_handler.c:406:seq_key_init()) kmalloced &apos;value&apos;: 48 at ffff8802128a1a00.
00040000:00000010:0.0:1400265787.402656:0:11385:0:(lquota_lib.c:53:lquota_key_init()) kmalloced &apos;value&apos;: 240 at ffff88029dd554c0.
00040000:00000010:0.0:1400265787.402659:0:11385:0:(qmt_dev.c:336:qmt_key_init()) kmalloced &apos;value&apos;: 376 at ffff8801f67b2e00.
00040000:00000010:0.0:1400265787.402661:0:11385:0:(qsd_lib.c:71:qsd_key_init()) kmalloced &apos;value&apos;: 376 at ffff8801f67b2600.
00000004:00000010:0.0:1400265787.402665:0:11385:0:(osd_handler.c:5503:osd_key_init()) kmalloced &apos;info&apos;: 4400 at ffff88020f758000.
00000004:00000010:0.0:1400265787.402668:0:11385:0:(osd_handler.c:5507:osd_key_init()) kmalloced &apos;info-&amp;gt;oti_it_ea_buf&apos;: 5120 at ffff88020f75a000.
00100000:00000010:0.0:1400265787.402671:0:11385:0:(lfsck_lib.c:46:lfsck_key_init()) kmalloced &apos;value&apos;: 1144 at ffff88020f723800.
00000004:00000010:0.0:1400265787.402673:0:11385:0:(mdt_handler.c:5817:mdt_key_init()) kmalloced &apos;value&apos;: 1464 at ffff88020f723000.
00000004:00000010:0.0:1400265787.402675:0:11385:0:(lod_dev.c:804:lod_key_init()) kmalloced &apos;value&apos;: 336 at ffff8801f67b2c00.
00000004:00000010:0.0:1400265787.402678:0:11385:0:(mdd_device.c:1553:mdd_key_init()) kmalloced &apos;value&apos;: 1504 at ffff88020f724800.
00000004:00000010:0.0:1400265787.402680:0:11385:0:(osp_dev.c:1171:osp_key_init()) kmalloced &apos;value&apos;: 568 at ffff880212882800.
00000004:00000001:0.0:1400265787.402682:0:11385:0:(mdt_handler.c:2181:mdt_object_find()) Process entered
00000004:00000040:0.0:1400265787.402683:0:11385:0:(mdt_handler.c:2183:mdt_object_find()) Find object for [0x200002340:0x1:0x0]
00000020:00000001:0.0:1400265787.402684:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000020:00000001:0.0:1400265787.402686:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000004:00000001:0.0:1400265787.402688:0:11385:0:(mdt_handler.c:2190:mdt_object_find()) Process leaving (rc=18446612141272470360 : -131932437081256 : ffff880215f41358)
00000004:00000001:0.0:1400265787.402689:0:11385:0:(mdd_object.c:238:mdd_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.402690:0:11385:0:(lod_object.c:498:lod_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.402692:0:11385:0:(lod_object.c:502:lod_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.402694:0:11385:0:(mdd_object.c:257:mdd_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.402695:0:11385:0:(mdd_object.c:238:mdd_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.402696:0:11385:0:(lod_object.c:498:lod_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.402698:0:11385:0:(lod_object.c:502:lod_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.402699:0:11385:0:(mdd_object.c:257:mdd_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.402700:0:11385:0:(mdt_internal.h:559:mdt_object_put()) Process entered
00000020:00000001:0.0:1400265787.402701:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000004:00000001:0.0:1400265787.402703:0:11385:0:(mdt_internal.h:561:mdt_object_put()) Process leaving
00000020:00000010:0.0:1400265787.402704:0:11385:0:(lu_object.c:429:lu_global_key_fini()) kfreed &apos;info&apos;: 512 at ffff8801f657ce00.
00000020:00000010:0.0:1400265787.402706:0:11385:0:(dt_object.c:56:dt_global_key_fini()) kfreed &apos;info&apos;: 1216 at ffff88032a027000.
00000040:00000010:0.0:1400265787.402708:0:11385:0:(llog_obd.c:278:llog_key_fini()) kfreed &apos;info&apos;: 224 at ffff8802b9611ac0.
00000020:00000010:0.0:1400265787.402710:0:11385:0:(tgt_main.c:163:tgt_key_fini()) kfreed &apos;info&apos;: 1152 at ffff8803277a9800.
80000000:00000010:0.0:1400265787.402712:0:11385:0:(fld_handler.c:60:fld_key_fini()) kfreed &apos;info&apos;: 72 at ffff880212e4a440.
40000000:00000010:0.0:1400265787.402714:0:11385:0:(fid_handler.c:406:seq_key_fini()) kfreed &apos;info&apos;: 48 at ffff8802128a1a00.
00040000:00000010:0.0:1400265787.402716:0:11385:0:(lquota_lib.c:53:lquota_key_fini()) kfreed &apos;info&apos;: 240 at ffff88029dd554c0.
00040000:00000010:0.0:1400265787.402717:0:11385:0:(qmt_dev.c:336:qmt_key_fini()) kfreed &apos;info&apos;: 376 at ffff8801f67b2e00.
00040000:00000010:0.0:1400265787.402719:0:11385:0:(qsd_lib.c:71:qsd_key_fini()) kfreed &apos;info&apos;: 376 at ffff8801f67b2600.
00000004:00000010:0.0:1400265787.402721:0:11385:0:(osd_handler.c:5533:osd_key_fini()) kfreed &apos;info-&amp;gt;oti_it_ea_buf&apos;: 5120 at ffff88020f75a000.
00000004:00000010:0.0:1400265787.402723:0:11385:0:(osd_handler.c:5536:osd_key_fini()) kfreed &apos;info&apos;: 4400 at ffff88020f758000.
00100000:00000010:0.0:1400265787.402725:0:11385:0:(lfsck_lib.c:55:lfsck_key_fini()) kfreed &apos;info&apos;: 1144 at ffff88020f723800.
00000004:00000010:0.0:1400265787.402727:0:11385:0:(mdt_handler.c:5829:mdt_key_fini()) kfreed &apos;info&apos;: 1464 at ffff88020f723000.
00000004:00000010:0.0:1400265787.402729:0:11385:0:(lod_dev.c:819:lod_key_fini()) kfreed &apos;info&apos;: 336 at ffff8801f67b2c00.
00000004:00000010:0.0:1400265787.402731:0:11385:0:(mdd_device.c:1563:mdd_key_fini()) kfreed &apos;info&apos;: 1504 at ffff88020f724800.
00000004:00000010:0.0:1400265787.402732:0:11385:0:(osp_dev.c:1171:osp_key_fini()) kfreed &apos;info&apos;: 568 at ffff880212882800.
00000020:00000010:0.0:1400265787.402734:0:11385:0:(lu_object.c:1593:keys_fini()) kfreed &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff880212baaa00.
00000004:00000001:0.0:1400265787.402735:0:11385:0:(mdt_lvb.c:179:mdt_lvbo_fill()) Process leaving (rc=3728 : 3728 : e90)
00010000:00000001:0.0:1400265787.402737:0:11385:0:(ldlm_lock.c:2072:ldlm_reprocess_all()) Process entered
00010000:00000001:0.0:1400265787.402738:0:11385:0:(ldlm_lock.c:1793:ldlm_reprocess_queue()) Process entered
00010000:00000001:0.0:1400265787.402739:0:11385:0:(ldlm_lock.c:1812:ldlm_reprocess_queue()) Process leaving (rc=1 : 1 : 1)
00010000:00000001:0.0:1400265787.402740:0:11385:0:(ldlm_lock.c:1793:ldlm_reprocess_queue()) Process entered
00010000:00000001:0.0:1400265787.402741:0:11385:0:(ldlm_lock.c:1812:ldlm_reprocess_queue()) Process leaving (rc=1 : 1 : 1)
00010000:00000001:0.0:1400265787.402742:0:11385:0:(ldlm_lock.c:1976:ldlm_run_ast_work()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:0.0:1400265787.402743:0:11385:0:(ldlm_lock.c:2100:ldlm_reprocess_all()) Process leaving
00010000:00000001:0.0:1400265787.402744:0:11385:0:(ldlm_lock.c:211:ldlm_lock_put()) Process entered
00010000:00000001:0.0:1400265787.402745:0:11385:0:(ldlm_lock.c:244:ldlm_lock_put()) Process leaving
00010000:00010000:0.0:1400265787.402746:0:11385:0:(ldlm_lockd.c:1436:ldlm_handle_enqueue0()) ### server-side enqueue handler END (lock ffff880212a98ac0, rc 0)
00000020:00000001:0.0:1400265787.402747:0:11385:0:(tgt_handler.c:1231:tgt_enqueue()) Process leaving (rc=0 : 0 : 0)
00010000:00000040:0.0:1400265787.402749:0:11385:0:(ldlm_lib.c:2363:target_committed_to_req()) last_committed 21474836483, transno 0, xid 1466802977730748
00010000:00000001:0.0:1400265787.402750:0:11385:0:(ldlm_lib.c:2422:target_send_reply()) Process entered
00010000:00000200:0.0:1400265787.402753:0:11385:0:(ldlm_lib.c:2410:target_send_reply_msg()) @@@ sending reply  req@ffff88020ff7f800 x1466802977730748/t0(0) o101-&amp;gt;a56bcdbb-ca09-9568-70fd-b3f47d606d12@10.10.4.10@tcp:0/0 lens 376/4072 e 0 to 0 dl 1400265793 ref 1 fl Interpret:/0/0 rc 0/0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Reply truncated at Client side, request is resent&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000100:00000001:0.0:1400265787.406980:0:7821:0:(events.c:97:reply_in_callback()) Process entered
00000100:00000200:0.0:1400265787.406983:0:7821:0:(events.c:99:reply_in_callback()) @@@ type 2, status 0  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/1544 e 0 to 0 dl 1400265794 ref 2 fl Rpc:/0/ffffffff rc 0/-1
00000100:00100000:0.0:1400265787.406989:0:7821:0:(events.c:126:reply_in_callback()) truncate req ffff8802faff9400 rpc 1544 - 4040+192
00000100:00000001:0.0:1400265787.406993:0:7821:0:(events.c:173:reply_in_callback()) Process leaving
00000400:00000010:0.0:1400265787.406995:0:7821:0:(lib-lnet.h:528:lnet_msg_free()) kfreed &apos;msg&apos;: 352 at ffff8802f7eb7400 (tot 83132940).
00000800:00000001:0.0:1400265787.406997:0:7821:0:(socklnd_cb.c:346:ksocknal_receive()) Process entered
00000800:00000001:0.0:1400265787.406999:0:7821:0:(socklnd_cb.c:384:ksocknal_receive()) Process leaving (rc=1 : 1 : 1)
00000800:00000001:0.1:1400265787.407011:0:7821:0:(socklnd_lib-linux.c:1002:ksocknal_data_ready()) Process entered
00000800:00000001:0.1:1400265787.407013:0:7821:0:(socklnd_cb.c:1566:ksocknal_read_callback()) Process entered
00000800:00000001:0.1:1400265787.407014:0:7821:0:(socklnd_cb.c:1585:ksocknal_read_callback()) Process leaving
00000800:00000001:0.1:1400265787.407015:0:7821:0:(socklnd_lib-linux.c:1017:ksocknal_data_ready()) Process leaving
00000800:00000001:0.0:1400265787.407017:0:7821:0:(socklnd_cb.c:346:ksocknal_receive()) Process entered
00000800:00000001:0.0:1400265787.407020:0:7821:0:(socklnd_cb.c:384:ksocknal_receive()) Process leaving (rc=1 : 1 : 1)
00000800:00000001:0.0:1400265787.407021:0:7821:0:(socklnd_cb.c:346:ksocknal_receive()) Process entered
00000800:00000001:0.0:1400265787.407023:0:7821:0:(socklnd_cb.c:384:ksocknal_receive()) Process leaving (rc=1 : 1 : 1)
00000100:00000001:2.0:1400265787.407086:0:2750:0:(client.c:1525:ptlrpc_check_set()) Process entered
00000400:00000001:2.0:1400265787.407089:0:2750:0:(lib-msg.c:48:lnet_build_unlink_event()) Process entered
00000400:00000001:2.0:1400265787.407090:0:2750:0:(lib-msg.c:57:lnet_build_unlink_event()) Process leaving
00000100:00000001:2.0:1400265787.407091:0:2750:0:(events.c:97:reply_in_callback()) Process entered
00000100:00000200:2.0:1400265787.407095:0:2750:0:(events.c:99:reply_in_callback()) @@@ type 6, status 0  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/1544 e 0 to 0 dl 1400265794 ref 2 fl Rpc:R/0/ffffffff rc -75/-1
00000100:00000200:2.0:1400265787.407103:0:2750:0:(events.c:120:reply_in_callback()) @@@ unlink  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/1544 e 0 to 0 dl 1400265794 ref 2 fl Rpc:R/0/ffffffff rc -75/-1
00000100:00000001:2.0:1400265787.407109:0:2750:0:(events.c:173:reply_in_callback()) Process leaving
00000400:00000010:2.0:1400265787.407110:0:2750:0:(lib-lnet.h:508:lnet_me_free()) kfreed &apos;me&apos;: 96 at ffff880320454dc0 (tot 83132844).
00000400:00000200:2.0:1400265787.407112:0:2750:0:(lib-md.c:73:lnet_md_unlink()) Unlinking md ffff880320454d40
00000400:00000010:2.0:1400265787.407114:0:2750:0:(lib-lnet.h:491:lnet_md_free()) kfreed &apos;md&apos;: 128 at ffff880320454d40 (tot 83132716).
00000100:00000001:2.0:1400265787.407115:0:2750:0:(client.c:2389:ptlrpc_unregister_reply()) Process leaving (rc=1 : 1 : 1)
00000100:00000001:2.0:1400265787.407117:0:2750:0:(client.c:1220:after_reply()) Process entered
02000000:00000001:2.0:1400265787.407118:0:2750:0:(sec.c:1710:sptlrpc_cli_free_repbuf()) Process entered
02000000:00000010:2.0:1400265787.407119:0:2750:0:(sec_null.c:231:null_free_repbuf()) kfreed &apos;req-&amp;gt;rq_repbuf&apos;: 2048 at ffff880308075800.
02000000:00000001:2.0:1400265787.407121:0:2750:0:(sec.c:1724:sptlrpc_cli_free_repbuf()) Process leaving
00000100:00000001:2.0:1400265787.407124:0:2750:0:(client.c:1244:after_reply()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.407125:0:2750:0:(client.c:1886:ptlrpc_check_set()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.407126:0:2750:0:(client.c:1525:ptlrpc_check_set()) Process entered
00000100:00000001:2.0:1400265787.407127:0:2750:0:(client.c:2381:ptlrpc_unregister_reply()) Process leaving (rc=1 : 1 : 1)
00000100:00000001:2.0:1400265787.407129:0:2750:0:(client.c:1064:ptlrpc_import_delay_req()) Process entered
00000100:00000001:2.0:1400265787.407129:0:2750:0:(client.c:1115:ptlrpc_import_delay_req()) Process leaving (rc=0 : 0 : 0)
02000000:00000001:2.0:1400265787.407131:0:2750:0:(sec.c:659:sptlrpc_req_refresh_ctx()) Process entered
02000000:00000001:2.0:1400265787.407132:0:2750:0:(sec.c:686:sptlrpc_req_refresh_ctx()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.407133:0:2750:0:(niobuf.c:682:ptl_send_rpc()) Process entered
02000000:00000001:2.0:1400265787.407135:0:2750:0:(sec.c:961:sptlrpc_cli_wrap_request()) Process entered
02000000:00000001:2.0:1400265787.407136:0:2750:0:(sec.c:997:sptlrpc_cli_wrap_request()) Process leaving (rc=0 : 0 : 0)
02000000:00000001:2.0:1400265787.407137:0:2750:0:(sec.c:1689:sptlrpc_cli_alloc_repbuf()) Process entered
02000000:00000010:2.0:1400265787.407139:0:2750:0:(sec_null.c:217:null_alloc_repbuf()) kmalloced &apos;req-&amp;gt;rq_repbuf&apos;: 8192 at ffff8803006c2000.
02000000:00000001:2.0:1400265787.407141:0:2750:0:(sec.c:1699:sptlrpc_cli_alloc_repbuf()) Process leaving (rc=0 : 0 : 0)
00000400:00000010:2.0:1400265787.407142:0:2750:0:(lib-lnet.h:500:lnet_me_alloc()) alloc &apos;(me)&apos;: 96 at ffff880320454d40 (tot 83132812).
00000400:00000010:2.0:1400265787.407145:0:2750:0:(lib-lnet.h:468:lnet_md_alloc()) alloc &apos;(md)&apos;: 128 at ffff880320454dc0 (tot 83132940).
00000100:00000200:2.0:1400265787.407147:0:2750:0:(niobuf.c:808:ptl_send_rpc()) Setup reply buffer: 8192 bytes, xid 1466802977730748, portal 10
00000100:00000001:2.0:1400265787.407149:0:2750:0:(client.c:2605:ptlrpc_request_addref()) Process entered
00000100:00000001:2.0:1400265787.407150:0:2750:0:(client.c:2607:ptlrpc_request_addref()) Process leaving (rc=18446612145115206656 : -131928594344960 : ffff8802faff9400)
00000100:00000040:2.0:1400265787.407153:0:2750:0:(niobuf.c:829:ptl_send_rpc()) @@@ send flg=2  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/4232 e 0 to 0 dl 1400265794 ref 3 fl Rpc:/2/ffffffff rc 0/-1
00000100:00000001:2.0:1400265787.407159:0:2750:0:(niobuf.c:60:ptl_send_buf()) Process entered
00000100:00000040:2.0:1400265787.407161:0:2750:0:(niobuf.c:64:ptl_send_buf()) conn=ffff8803282dacc0 id 12345-10.10.4.17@tcp
00000400:00000010:2.0:1400265787.407162:0:2750:0:(lib-lnet.h:468:lnet_md_alloc()) alloc &apos;(md)&apos;: 128 at ffff8802f8c444c0 (tot 83133068).
00000100:00000200:2.0:1400265787.407164:0:2750:0:(niobuf.c:86:ptl_send_buf()) Sending 376 bytes to portal 12, xid 1466802977730748, offset 0
00000400:00000010:2.0:1400265787.407166:0:2750:0:(lib-lnet.h:517:lnet_msg_alloc()) alloc &apos;(msg)&apos;: 352 at ffff8802f7244000 (tot 83133420).
00000400:00000200:2.0:1400265787.407169:0:2750:0:(lib-move.c:2176:LNetPut()) LNetPut -&amp;gt; 12345-10.10.4.17@tcp
00000800:00000200:2.0:1400265787.407173:0:2750:0:(socklnd_cb.c:947:ksocknal_send()) sending 376 bytes in 1 frags to 12345-10.10.4.17@tcp
00000800:00000010:2.0:1400265787.407175:0:2750:0:(socklnd_cb.c:51:ksocknal_alloc_tx()) alloc &apos;(tx)&apos;: 232 at ffff880310ae9180 (tot 83133652).
00000800:00000200:2.0:1400265787.407178:0:2750:0:(socklnd.c:199:ksocknal_find_peer_locked()) got peer [ffff880338348380] -&amp;gt; 12345-10.10.4.17@tcp (5)
00000800:00000200:2.0:1400265787.407182:0:2750:0:(socklnd_cb.c:706:ksocknal_queue_tx_locked()) Sending to 12345-10.10.4.17@tcp ip 10.10.4.17:988
00000800:00000200:2.0:1400265787.407184:0:2750:0:(socklnd_cb.c:725:ksocknal_queue_tx_locked()) Packet ffff880310ae9180 type 1, nob 472 niov 2 nkiov 0
00000100:00000001:2.0:1400265787.407189:0:2750:0:(niobuf.c:101:ptl_send_buf()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.407191:0:2750:0:(niobuf.c:837:ptl_send_rpc()) Process leaving via out (rc=0 : 0 : 0x0)
00000100:00000001:2.0:1400265787.407193:0:2750:0:(client.c:1886:ptlrpc_check_set()) Process leaving (rc=1 : 1 : 1)
00000100:00000001:2.0:1400265787.407196:0:2750:0:(client.c:2058:ptlrpc_set_next_timeout()) Process entered
00000100:00000001:2.0:1400265787.407197:0:2750:0:(client.c:2095:ptlrpc_set_next_timeout()) Process leaving (rc=6 : 6 : 6)
00000100:00100000:2.0:1400265787.407198:0:2750:0:(client.c:2132:ptlrpc_set_wait()) set ffff8803287f55c0 going to sleep for 6 seconds
00000100:00000001:2.0:1400265787.407199:0:2750:0:(client.c:1525:ptlrpc_check_set()) Process entered
00000100:00000001:2.0:1400265787.407200:0:2750:0:(client.c:1886:ptlrpc_check_set()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:2.0:1400265787.407201:0:2750:0:(client.c:1525:ptlrpc_check_set()) Process entered
00000100:00000001:2.0:1400265787.407202:0:2750:0:(client.c:1886:ptlrpc_check_set()) Process leaving (rc=0 : 0 : 0)
00000800:00000200:1.0:1400265787.407277:0:7820:0:(socklnd_cb.c:514:ksocknal_process_transmit()) send(0) 0
00000800:00000001:1.0:1400265787.407285:0:7820:0:(socklnd_cb.c:392:ksocknal_tx_done()) Process entered
00000800:00000010:1.0:1400265787.407286:0:7820:0:(socklnd_cb.c:106:ksocknal_free_tx()) kfreed &apos;tx&apos;: 232 at ffff880310ae9180 (tot 83133420).
00000100:00000001:1.0:1400265787.407289:0:7820:0:(events.c:62:request_out_callback()) Process entered
00000100:00000200:1.0:1400265787.407293:0:7820:0:(events.c:68:request_out_callback()) @@@ type 5, status 0  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/4232 e 0 to 0 dl 1400265794 ref 3 fl Rpc:/2/ffffffff rc 0/-1
00000100:00000001:1.0:1400265787.407300:0:7820:0:(client.c:2313:__ptlrpc_req_finished()) Process entered
00000100:00000040:1.0:1400265787.407302:0:7820:0:(client.c:2325:__ptlrpc_req_finished()) @@@ refcount now 2  req@ffff8802faff9400 x1466802977730748/t0(0) o101-&amp;gt;lustre-MDT0000-mdc-ffff880331322400@10.10.4.17@tcp:12/10 lens 376/4232 e 0 to 0 dl 1400265794 ref 3 fl Rpc:/2/ffffffff rc 0/-1
00000100:00000001:1.0:1400265787.407307:0:7820:0:(client.c:2332:__ptlrpc_req_finished()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:1.0:1400265787.407309:0:7820:0:(events.c:87:request_out_callback()) Process leaving
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;Server detects resend, but since not expected for layout-lock, handles again, hence the LBUG &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000100:00000001:1.0:1400265787.403522:0:11347:0:(events.c:296:request_in_callback()) Process entered
00000100:00000200:1.0:1400265787.403523:0:11347:0:(events.c:306:request_in_callback()) event type 2, status 0, service mdt00000100:00000010:1.0:1400265787.403525:0:11347:0:(client.c:410:ptlrpc_request_cache_alloc()) slab-alloced &apos;req&apos;: 976 at ffff880214c69000.
00000100:00000040:1.0:1400265787.403526:0:11347:0:(events.c:350:request_in_callback()) incoming req@ffff880214c69000 x1466802977730748 msgsize 376
00000100:00100000:1.0:1400265787.403529:0:11347:0:(events.c:352:request_in_callback()) peer: 12345-10.10.4.10@tcp
00000100:00000001:1.0:1400265787.403533:0:11347:0:(events.c:385:request_in_callback()) Process leaving00000400:00000010:1.0:1400265787.403534:0:11347:0:(lib-lnet.h:528:lnet_msg_free()) kfreed &apos;msg&apos;: 352 at ffff880212baaa00 (tot 117245148).
00000800:00000001:1.0:1400265787.403536:0:11347:0:(socklnd_cb.c:346:ksocknal_receive()) Process entered
00000800:00000001:1.0:1400265787.403538:0:11347:0:(socklnd_cb.c:384:ksocknal_receive()) Process leaving (rc=1 : 1 : 1)
00000400:00000001:0.0:1400265787.403585:0:11385:0:(watchdog.c:416:lc_watchdog_touch()) Process entered
00000400:00000001:0.0:1400265787.403587:0:11385:0:(watchdog.c:427:lc_watchdog_touch()) Process leaving
00000100:00000001:0.0:1400265787.403590:0:11385:0:(service.c:1860:ptlrpc_server_handle_req_in()) Process entered
02000000:00000001:0.0:1400265787.403591:0:11385:0:(sec.c:2053:sptlrpc_svc_unwrap_request()) Process entered
00000100:00000001:0.0:1400265787.403592:0:11385:0:(pack_generic.c:582:__lustre_unpack_msg()) Process entered
00000100:00000001:0.0:1400265787.403593:0:11385:0:(pack_generic.c:601:__lustre_unpack_msg()) Process leaving (rc=0 : 0 : 0)
02000000:00000001:0.0:1400265787.403595:0:11385:0:(sec.c:2100:sptlrpc_svc_unwrap_request()) Process leaving (rc=0 : 0 : 0)
00000100:00100000:0.0:1400265787.403597:0:11385:0:(service.c:1939:ptlrpc_server_handle_req_in()) got req x1466802977730748
00000020:00000001:0.0:1400265787.403598:0:11385:0:(genops.c:741:class_conn2export()) Process entered
00000020:00000040:0.0:1400265787.403599:0:11385:0:(genops.c:753:class_conn2export()) looking for export cookie 0xd169abc5abfab541
00000020:00000001:0.0:1400265787.403600:0:11385:0:(lustre_handles.c:172:class_handle2object()) Process entered
00000020:00000040:0.0:1400265787.403601:0:11385:0:(genops.c:839:class_export_get()) GETting export ffff880212978800 : new refcount 13
00000020:00000001:0.0:1400265787.403603:0:11385:0:(lustre_handles.c:195:class_handle2object()) Process leaving (rc=18446612141216073728 : -131932493477888 : ffff880212978800)
00000020:00000001:0.0:1400265787.403605:0:11385:0:(genops.c:755:class_conn2export()) Process leaving (rc=18446612141216073728 : -131932493477888 : ffff880212978800)
00000100:00000001:0.0:1400265787.403607:0:11385:0:(service.c:1068:ptlrpc_update_export_timer()) Process entered
00000100:00000001:0.0:1400265787.403608:0:11385:0:(service.c:1081:ptlrpc_update_export_timer()) Process leaving
00000020:00000010:0.0:1400265787.403609:0:11385:0:(lu_object.c:1643:keys_init()) kmalloced &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff880268932c00.
02000000:00000010:0.0:1400265787.403611:0:11385:0:(capa.c:416:lu_capainfo_key_init()) kmalloced &apos;value&apos;: 128 at ffff880213336bc0.
00000020:00000010:0.0:1400265787.403612:0:11385:0:(lu_ucred.c:53:lu_ucred_key_init()) kmalloced &apos;value&apos;: 72 at ffff880212f7cac0.
00000020:00000010:0.0:1400265787.403614:0:11385:0:(tgt_main.c:187:tgt_ses_key_init()) kmalloced &apos;value&apos;: 144 at ffff880215e7a740.
00000100:00000040:0.0:1400265787.403616:0:11385:0:(service.c:1203:ptlrpc_at_set_timer()) armed mdt at +1s
00000100:00000001:0.0:1400265787.403618:0:11385:0:(service.c:1670:ptlrpc_server_request_add()) Process entered
00000100:00000001:0.0:1400265787.403618:0:11385:0:(service.c:1571:ptlrpc_server_hpreq_init()) Process entered
00000100:00000001:0.0:1400265787.403620:0:11385:0:(service.c:1622:ptlrpc_server_hpreq_init()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:0.0:1400265787.403621:0:11385:0:(service.c:1685:ptlrpc_server_request_add()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:0.0:1400265787.403625:0:11385:0:(service.c:2002:ptlrpc_server_handle_req_in()) Process leaving (rc=1 : 1 : 1)
00000400:00000001:0.0:1400265787.403627:0:11385:0:(watchdog.c:433:lc_watchdog_disable()) Process entered
00000400:00000001:0.0:1400265787.403628:0:11385:0:(watchdog.c:441:lc_watchdog_disable()) Process leaving
00000400:00000001:0.0:1400265787.403629:0:11385:0:(watchdog.c:416:lc_watchdog_touch()) Process entered
00000400:00000001:0.0:1400265787.403630:0:11385:0:(watchdog.c:427:lc_watchdog_touch()) Process leaving
00000100:00000001:0.0:1400265787.403631:0:11385:0:(service.c:2025:ptlrpc_server_handle_request()) Process entered
00000100:00000001:0.0:1400265787.403632:0:11385:0:(service.c:1803:ptlrpc_server_request_get()) Process entered
00000100:00100000:0.0:1400265787.403634:0:11385:0:(nrs_fifo.c:182:nrs_fifo_req_get()) NRS start fifo request from 12345-10.10.4.10@tcp, seq: 173
00000100:00000040:0.0:1400265787.403636:0:11385:0:(service.c:1841:ptlrpc_server_request_get()) RPC GETting export ffff880212978800 : new rpc_count 1
00000100:00000001:0.0:1400265787.403637:0:11385:0:(service.c:1843:ptlrpc_server_request_get()) Process leaving (rc=18446612141252710400 : -131932456841216 : ffff880214c69000)
00000100:00000040:0.0:1400265787.403640:0:11385:0:(lustre_net.h:3296:ptlrpc_rqphase_move()) @@@ move req &quot;New&quot; -&amp;gt; &quot;Interpret&quot;  req@ffff880214c69000 x1466802977730748/t0(0) o101-&amp;gt;a56bcdbb-ca09-9568-70fd-b3f47d606d12@10.10.4.10@tcp:0/0 lens 376/0 e 0 to 0 dl 1400265793 ref 1 fl New:/2/ffffffff rc 0/-1
00000100:00000001:0.0:1400265787.403652:0:11385:0:(service.c:1068:ptlrpc_update_export_timer()) Process entered
00000100:00000001:0.0:1400265787.403653:0:11385:0:(service.c:1081:ptlrpc_update_export_timer()) Process leaving
00000100:00100000:0.0:1400265787.403655:0:11385:0:(service.c:2086:ptlrpc_server_handle_request()) Handling RPC pname:cluuid+ref:pid:xid:nid:opc mdt00_002:a56bcdbb-ca09-9568-70fd-b3f47d606d12+13:2750:x1466802977730748:12345-10.10.4.10@tcp:101
00000100:00000200:0.0:1400265787.403657:0:11385:0:(service.c:2091:ptlrpc_server_handle_request()) got req 1466802977730748
00000020:00000001:0.0:1400265787.403659:0:11385:0:(tgt_handler.c:589:tgt_request_handle()) Process entered
00000020:00000001:0.0:1400265787.403660:0:11385:0:(tgt_handler.c:553:tgt_handler_find_check()) Process entered
00000020:00000001:0.0:1400265787.403661:0:11385:0:(tgt_handler.c:575:tgt_handler_find_check()) Process leaving (rc=18446744072111134752 : -1598416864 : ffffffffa0ba1820)
00000020:00000001:0.0:1400265787.403663:0:11385:0:(tgt_handler.c:492:tgt_handle_recovery()) Process entered
00000020:00000001:0.0:1400265787.403664:0:11385:0:(tgt_handler.c:542:tgt_handle_recovery()) Process leaving (rc=1 : 1 : 1)
00000020:00000001:0.0:1400265787.403666:0:11385:0:(tgt_handler.c:377:tgt_handle_request0()) Process entered
00000020:00000001:0.0:1400265787.403667:0:11385:0:(tgt_handler.c:307:tgt_request_preprocess()) Process entered
00000020:00000001:0.0:1400265787.403668:0:11385:0:(tgt_handler.c:362:tgt_request_preprocess()) Process leaving (rc=0 : 0 : 0)
00000020:00000001:0.0:1400265787.403670:0:11385:0:(tgt_handler.c:1220:tgt_enqueue()) Process entered
00010000:00000001:0.0:1400265787.403671:0:11385:0:(ldlm_lockd.c:1165:ldlm_handle_enqueue0()) Process entered
00010000:00010000:0.0:1400265787.403671:0:11385:0:(ldlm_lockd.c:1167:ldlm_handle_enqueue0()) ### server-side enqueue handler START
00010000:00000001:0.0:1400265787.403673:0:11385:0:(ldlm_lockd.c:1556:ldlm_request_cancel()) Process entered
00010000:00000001:0.0:1400265787.403673:0:11385:0:(ldlm_lockd.c:1560:ldlm_request_cancel()) Process leaving (rc=0 : 0 : 0)
00010000:00010000:0.0:1400265787.403677:0:11385:0:(ldlm_lockd.c:1241:ldlm_handle_enqueue0()) @@@ found existing lock cookie 0xd169abc5abfab66e  req@ffff880214c69000 x1466802977730748/t0(0) o101-&amp;gt;a56bcdbb-ca09-9568-70fd-b3f47d606d12@10.10.4.10@tcp:0/0 lens 376/0 e 0 to 0 dl 1400265793 ref 1 fl Interpret:/2/ffffffff rc 0/-1
00010000:00000001:0.0:1400265787.403683:0:11385:0:(ldlm_lockd.c:1242:ldlm_handle_enqueue0()) Process leaving via existing_lock (rc=0 : 0 : 0x0)
00010000:00000001:0.0:1400265787.403684:0:11385:0:(ldlm_lock.c:1666:ldlm_lock_enqueue()) Process entered
00000004:00000001:0.0:1400265787.403685:0:11385:0:(mdt_handler.c:3497:mdt_intent_policy()) Process entered
00000004:00000001:0.0:1400265787.403687:0:11385:0:(mdt_handler.c:3434:mdt_intent_opc()) Process entered
00000004:00000001:0.0:1400265787.403688:0:11385:0:(mdt_handler.c:2735:mdt_unpack_req_pack_rep()) Process entered
00000004:00000001:0.0:1400265787.403689:0:11385:0:(mdt_handler.c:2753:mdt_unpack_req_pack_rep()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.403690:0:11385:0:(mdt_handler.c:3247:mdt_intent_layout()) Process entered
00000004:00000001:0.0:1400265787.403691:0:11385:0:(mdt_handler.c:2181:mdt_object_find()) Process entered
00000004:00000040:0.0:1400265787.403692:0:11385:0:(mdt_handler.c:2183:mdt_object_find()) Find object for [0x200002340:0x1:0x0]
00000020:00000001:0.0:1400265787.403693:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000020:00000001:0.0:1400265787.403695:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000004:00000001:0.0:1400265787.403697:0:11385:0:(mdt_handler.c:2190:mdt_object_find()) Process leaving (rc=18446612141272470360 : -131932437081256 : ffff880215f41358)
00000004:00000001:0.0:1400265787.403699:0:11385:0:(mdd_object.c:238:mdd_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.403700:0:11385:0:(lod_object.c:498:lod_xattr_get()) Process entered
00000004:00000001:0.0:1400265787.403704:0:11385:0:(lod_object.c:502:lod_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.403706:0:11385:0:(mdd_object.c:257:mdd_xattr_get()) Process leaving (rc=3728 : 3728 : e90)
00000004:00000001:0.0:1400265787.403707:0:11385:0:(mdt_internal.h:559:mdt_object_put()) Process entered
00000020:00000001:0.0:1400265787.403708:0:11385:0:(lustre_fid.h:707:fid_flatten32()) Process leaving (rc=36962305 : 36962305 : 2340001)
00000004:00000001:0.0:1400265787.403710:0:11385:0:(mdt_internal.h:561:mdt_object_put()) Process leaving
00000100:00000001:0.0:1400265787.403711:0:11385:0:(pack_generic.c:337:lustre_pack_reply_v2()) Process entered
02000000:00000001:0.0:1400265787.403712:0:11385:0:(sec.c:2113:sptlrpc_svc_alloc_rs()) Process entered
02000000:00000010:0.0:1400265787.403714:0:11385:0:(sec_null.c:320:null_alloc_rs()) kmalloced &apos;rs&apos;: 4336 at ffff88020f758000.
02000000:00000001:0.0:1400265787.403715:0:11385:0:(sec.c:2149:sptlrpc_svc_alloc_rs()) Process leaving (rc=0 : 0 : 0)
00000100:00000001:0.0:1400265787.403717:0:11385:0:(pack_generic.c:371:lustre_pack_reply_v2()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.403718:0:11385:0:(mdt_handler.c:3284:mdt_intent_layout()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.403719:0:11385:0:(mdt_handler.c:3483:mdt_intent_opc()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1400265787.403721:0:11385:0:(mdt_handler.c:3536:mdt_intent_policy()) Process leaving (rc=0 : 0 : 0)
00010000:00000001:0.0:1400265787.403722:0:11385:0:(ldlm_inodebits.c:188:ldlm_process_inodebits_lock()) Process entered
00010000:00040000:0.0:1400265787.403723:0:11385:0:(ldlm_inodebits.c:190:ldlm_process_inodebits_lock()) ASSERTION( lock-&amp;gt;l_granted_mode != lock-&amp;gt;l_req_mode ) failed:
00010000:00000010:0.1:1400265787.415728:0:11385:0:(ldlm_lock.c:453:lock_handle_free()) slab-freed &apos;lock&apos;: 504 at ffff8802107f1700.
00010000:00040000:0.0:1400265787.415731:0:11385:0:(ldlm_inodebits.c:190:ldlm_process_inodebits_lock()) LBUG
00000400:00000001:0.0:1400265787.503768:0:11385:0:(linux-debug.c:151:libcfs_run_lbug_upcall()) Process entered
00000400:00000001:0.0:1400265787.503770:0:11385:0:(linux-debug.c:118:libcfs_run_upcall()) Process entered
00010000:00000010:0.1:1400265787.504023:0:0:0:(ldlm_lock.c:453:lock_handle_free()) slab-freed &apos;lock&apos;: 504 at ffff880212a986c0.
00000400:00080000:3.0:1400265787.504316:0:11385:0:(linux-debug.c:142:libcfs_run_upcall()) Invoked LNET upcall /usr/lib/lustre/lnet_upcall LBUG,/var/lib/jenkins/workspace/lustre-reviews/arch/x86_64/build_type/server/distro/el6/ib_stack/inkernel/BUILD/BUILD/lustre-2.5.58/lustre/ptlrpc/../../lustre/ldlm/ldlm_inodebits.c,ldlm_process_inodebits_lock,190
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I have an add-on for current Gerrit-change &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/#5978-&lt;span class=&quot;error&quot;&gt;&amp;#91;8,9&amp;#93;&lt;/span&gt; that handles this truncate/resend during layout-lock issue.&lt;br/&gt;
And I just submitted it, at &lt;a href=&quot;http://review.whamcloud.com/10378&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10378&lt;/a&gt;, as a new patch/change that relates on latest &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/#5978-9 patch-set.&lt;/p&gt;</comment>
                            <comment id="85132" author="simmonsja" created="Thu, 29 May 2014 14:52:06 +0000"  >&lt;p&gt;Here is the b2_5 version of the patch I&apos;m testing with:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/10492&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10492&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="85188" author="simmonsja" created="Thu, 29 May 2014 22:41:19 +0000"  >&lt;p&gt;Tested it first on my smallest size test bed and the two patches that I combined and back port to b2_5 did pretty well.  So I moved the test to a larger system and when I went to run my simul reproducer I now get this:&lt;/p&gt;

&lt;p&gt;May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.308755&amp;#93;&lt;/span&gt; &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.317414&amp;#93;&lt;/span&gt; mdt02_247     D 0000000000000008     0 17580      2 0x00000000&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.324711&amp;#93;&lt;/span&gt;  ffff88104d7b98c8 0000000000000046 0000000000000000 ffffffffa0e0f4ab&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.333113&amp;#93;&lt;/span&gt;  ffff8810719e5310 ffff8810719e52b8 ffff88104d7a5538 ffffffffa0e0f4ab&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.341442&amp;#93;&lt;/span&gt;  ffff88104d7b7af8 ffff88104d7b9fd8 000000000000fbc8 ffff88104d7b7af8&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.349651&amp;#93;&lt;/span&gt; Call Trace:&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.352455&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152a6d5&amp;gt;&amp;#93;&lt;/span&gt; rwsem_down_failed_common+0x95/0x1d0&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.359226&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0c32b&amp;gt;&amp;#93;&lt;/span&gt; ? ldiskfs_xattr_trusted_get+0x2b/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;ldiskfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.367490&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811ae017&amp;gt;&amp;#93;&lt;/span&gt; ? generic_getxattr+0x87/0x90&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.373729&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152a866&amp;gt;&amp;#93;&lt;/span&gt; rwsem_down_read_failed+0x26/0x30&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.380272&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08b7083&amp;gt;&amp;#93;&lt;/span&gt; ? lod_xattr_get+0x153/0x420 &lt;span class=&quot;error&quot;&gt;&amp;#91;lod&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.386972&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8128eab4&amp;gt;&amp;#93;&lt;/span&gt; call_rwsem_down_read_failed+0x14/0x30&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.393901&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81529d64&amp;gt;&amp;#93;&lt;/span&gt; ? down_read+0x24/0x30&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.399595&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0dd949d&amp;gt;&amp;#93;&lt;/span&gt; mdt_object_open_lock+0x1ed/0x9d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.406680&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0dbb7ac&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_attr_get_complex+0x4ec/0x770 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.413960&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0de1ac7&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_open+0x15b7/0x2150 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.420672&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0453f76&amp;gt;&amp;#93;&lt;/span&gt; ? upcall_cache_get_entry+0x296/0x880 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.428803&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05f3700&amp;gt;&amp;#93;&lt;/span&gt; ? lu_ucred+0x20/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.435224&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0dca441&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_rec+0x41/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.441611&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0dafe63&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_internal+0x4c3/0x780 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.448515&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0db03ee&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_reint+0x1ee/0x520 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.455274&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0dadbce&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x3ae/0x770 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.462212&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa070e511&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x361/0x8c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.469289&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0737b9f&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x51f/0x10f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.476684&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0dae096&amp;gt;&amp;#93;&lt;/span&gt; mdt_enqueue+0x46/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.482790&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0db2c5a&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x52a/0x1470 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.489687&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0def765&amp;gt;&amp;#93;&lt;/span&gt; mds_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.496399&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0767c25&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_server_handle_request+0x385/0xc00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.504762&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04384ce&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_timer_arm+0xe/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.511492&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04493cf&amp;gt;&amp;#93;&lt;/span&gt; ? lc_watchdog_touch+0x6f/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.518772&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075f2a9&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa9/0x2d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.525902&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810546b9&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up_common+0x59/0x90&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.532184&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0768f8d&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xaed/0x1920 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.545438&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ab56&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.550656&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.556068&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aac0&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
May 29 18:27:03 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;10809.561451&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;Its better in that the MDS doesn&apos;t assert any more.I also canceled the job and the system recovered which is a good sign.&lt;/p&gt;</comment>
                            <comment id="85197" author="simmonsja" created="Thu, 29 May 2014 23:24:18 +0000"  >&lt;p&gt;IOR single shared file run is producing the below messages. Also I have a log dumps that I uploaded to&lt;/p&gt;

&lt;p&gt;ftp.whamcloud.com/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;/lustre-log-&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;-29-05-2014&lt;/p&gt;

&lt;p&gt;May 29 18:47:39 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12046.175064&amp;#93;&lt;/span&gt; Lustre: 17065:0:(service.c:1349:ptlrpc_at_send_early_reply()) Skipped 8 previous similar messages&lt;br/&gt;
May 29 18:47:45 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12052.188755&amp;#93;&lt;/span&gt; Lustre: atlastds-MDT0000: Client 23d4a774-8c3b-4edc-3b98-24d112966233 (at 14@gni2) reconnecting&lt;br/&gt;
May 29 18:47:45 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12052.199379&amp;#93;&lt;/span&gt; Lustre: Skipped 1 previous similar message&lt;br/&gt;
May 29 19:00:15 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12801.706037&amp;#93;&lt;/span&gt; Lustre: 17829:0:(service.c:1349:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-150), not sending early reply&lt;br/&gt;
May 29 19:00:15 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12801.706039&amp;#93;&lt;/span&gt;   req@ffff8807e74adc00 x1468567563329976/t0(0) o101-&amp;gt;73ecb9e8-d229-752a-3cfc-2036f509fb5b@50@gni2:0/0 lens 512/3512 e 0 to 0 dl 1401404420 ref 2 fl Interpret:/0/0 rc 0/0&lt;br/&gt;
May 29 19:00:15 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12802.233175&amp;#93;&lt;/span&gt; Lustre: 17062:0:(service.c:1349:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-150), not sending early reply&lt;br/&gt;
May 29 19:00:15 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12802.233176&amp;#93;&lt;/span&gt;   req@ffff8807e76d6400 x1468567565457596/t0(0) o101-&amp;gt;e8d3ca06-b910-df85-78e9-5653469b4bb8@81@gni2:0/0 lens 512/3512 e 0 to 0 dl 1401404420 ref 2 fl Interpret:/0/0 rc 0/0&lt;br/&gt;
May 29 19:00:15 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12802.263339&amp;#93;&lt;/span&gt; Lustre: 17062:0:(service.c:1349:ptlrpc_at_send_early_reply()) Skipped 4 previous similar messages&lt;br/&gt;
May 29 19:00:21 atlas-tds-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;12808.519392&amp;#93;&lt;/span&gt; Lustre: atlastds-MDT0000: Client 568959a5-c11b-5ded-e1c9-1c90892a82ca (at 49@gni2) reconnecting&lt;/p&gt;</comment>
                            <comment id="85610" author="bfaccini" created="Tue, 3 Jun 2014 16:02:40 +0000"  >&lt;p&gt;I am afraid I am not able to conclude about what is going wrong, using this only MDS Lustre debug-log. Are there still similar stacks being dumped in dmesg/syslog ?&lt;/p&gt;

&lt;p&gt;Also, I did not find any eviction trace in this log.&lt;/p&gt;</comment>
                            <comment id="85618" author="simmonsja" created="Tue, 3 Jun 2014 16:45:12 +0000"  >&lt;p&gt;With the patches the evictions are gone which is good. Now its just locking up.&lt;/p&gt;</comment>
                            <comment id="85652" author="simmonsja" created="Tue, 3 Jun 2014 21:01:19 +0000"  >&lt;p&gt;I tested master with the patches as well and got the same results.&lt;/p&gt;</comment>
                            <comment id="86963" author="jlevi" created="Wed, 18 Jun 2014 19:18:37 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/5978/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/5978/&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/10378/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10378/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87131" author="pjones" created="Fri, 20 Jun 2014 05:03:00 +0000"  >&lt;p&gt;Landed for 2.6&lt;/p&gt;</comment>
                            <comment id="87827" author="bfaccini" created="Mon, 30 Jun 2014 19:40:29 +0000"  >&lt;p&gt;Merged b2_4 backport of both #5978 and #10378 master changes for this ticket, is at &lt;a href=&quot;http://review.whamcloud.com/10902&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10902&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="87833" author="morrone" created="Mon, 30 Jun 2014 20:27:21 +0000"  >&lt;p&gt;We would really, really prefer that you guys not merge together multiple patches when backporting.  That makes sanity checking and rebasing quite a bit more complicated for us.&lt;/p&gt;</comment>
                            <comment id="87905" author="bfaccini" created="Tue, 1 Jul 2014 17:34:44 +0000"  >&lt;p&gt;The merge was done to avoid the same miss with original patch where mdt_intent_layout() had been forgotten because not present in Xyratex source-tree at this time.&lt;/p&gt;

&lt;p&gt;BTW, my b2_4 patch/back-port has some problem and needs some re-work, because MDS bombs with &quot;(ldlm_lock.c:851:ldlm_lock_decref_internal_nolock()) ASSERTION( lock-&amp;gt;l_readers &amp;gt; 0 ) failed&quot; when running LLNL reproducer from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; or recovery-small/test_53 in auto-tests.&lt;br/&gt;
More to come, crash-dump is under investigations ...&lt;/p&gt;</comment>
                            <comment id="96037" author="bfaccini" created="Thu, 9 Oct 2014 15:19:47 +0000"  >&lt;p&gt;I should have commented this before, sorry.&lt;br/&gt;
In fact there are regression issues with my b2_4 back-port (&lt;a href=&quot;http://review.whamcloud.com/#/c/10902/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10902/&lt;/a&gt;) of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt; changes. And I checked that the b2_5 version (&lt;a href=&quot;http://review.whamcloud.com/#/c/10492/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10492/&lt;/a&gt;) is ok and that it should land soon now.&lt;/p&gt;</comment>
                            <comment id="106601" author="apercher" created="Wed, 11 Feb 2015 09:01:02 +0000"  >&lt;p&gt;need to included fix from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5530&quot; title=&quot;MDS thread lockup witrh patched 2.5 server&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5530&quot;&gt;&lt;del&gt;LU-5530&lt;/del&gt;&lt;/a&gt; with fix from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="106648" author="bfaccini" created="Wed, 11 Feb 2015 17:03:28 +0000"  >&lt;p&gt;Antoine,&lt;br/&gt;
You are right this must be made clear here also, so to be complete, the full list of post &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt; related tickets/patches has been well documented and in detail by Oleg in its &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt; 2x comments dated 11/Sep/14 for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4584&quot; title=&quot;Lock revocation process fails consistently&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4584&quot;&gt;&lt;del&gt;LU-4584&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="109614" author="gerrit" created="Fri, 13 Mar 2015 06:59:35 +0000"  >&lt;p&gt;Jian Yu (jian.yu@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/14056&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14056&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2827&quot; title=&quot;mdt_intent_fixup_resent() cannot find the proper lock in hash&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2827&quot;&gt;&lt;del&gt;LU-2827&lt;/del&gt;&lt;/a&gt; tests: add version check to recovery-small test 113&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 43d451f52f487780c9c83340574a125c3b6506fe&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="26108">LU-5530</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="25345">LU-5266</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="23009">LU-4584</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="25515">LU-5314</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="12260" name="hash-bug.log" size="2081395" author="panda" created="Mon, 18 Feb 2013 16:08:48 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvjbz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6847</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>