<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:29:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2943] LBUG mdt_reconstruct_open()) ASSERTION( (!(rc &lt; 0) || (lustre_msg_get_transno(req-&gt;rq_repmsg) == 0)) )</title>
                <link>https://jira.whamcloud.com/browse/LU-2943</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue has already been hit on lustre 2.2 (see &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1702&quot; title=&quot;LustreError: 3218:0: (mdt_open.c:1035:mdt_reconstruct_open()) LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1702&quot;&gt;&lt;del&gt;LU-1702&lt;/del&gt;&lt;/a&gt;). Traces are exactly the same as for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1702&quot; title=&quot;LustreError: 3218:0: (mdt_open.c:1035:mdt_reconstruct_open()) LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1702&quot;&gt;&lt;del&gt;LU-1702&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;It&apos;s been hit four consecutive times so it seems quite easy to reproduce.&lt;/p&gt;

&lt;p&gt;2013-03-06 16:05:01 LustreError: 31751:0:(mdt_open.c:1023:mdt_reconstruct_open()) ASSERTION( (!(rc &amp;lt; 0)&lt;/p&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;th class=&apos;confluenceTh&apos;&gt; (lustre_msg_get_transno(req-&amp;gt;rq_repmsg) == 0)) ) failed:&lt;br/&gt;
2013-03-06 16:05:01 LustreError: 31751:0:(mdt_open.c:1023:mdt_reconstruct_open()) LBUG&lt;br/&gt;
2013-03-06 16:05:01 Pid: 31751, comm: mdt_145&lt;br/&gt;
2013-03-06 16:05:01 &lt;br/&gt;
2013-03-06 16:05:01 Call Trace:&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04a27f5&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04a2e07&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d9ed87&amp;gt;&amp;#93;&lt;/span&gt; mdt_reconstruct_open+0x7c7/0xa80 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d908c5&amp;gt;&amp;#93;&lt;/span&gt; mdt_reconstruct+0x45/0x120 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d7d099&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_internal+0x709/0x8e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d7d53d&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_reint+0x1ed/0x500 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d7bc09&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x379/0x690 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06ca3c1&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x361/0x8f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f03dd&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x48d/0xf50 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d7c586&amp;gt;&amp;#93;&lt;/span&gt; mdt_enqueue+0x46/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d71762&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x932/0x1750 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d72655&amp;gt;&amp;#93;&lt;/span&gt; mdt_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa071f4f6&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xd16/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810017cc&amp;gt;&amp;#93;&lt;/span&gt; ? __switch_to+0x1ac/0x320&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa071e7e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100412a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa071e7e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa071e7e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:05:01  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81004120&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/th&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;



&lt;p&gt;On the crash, the file who make the LBUG is a file created by mpio. &lt;/p&gt;

&lt;p&gt;Onsite support team made the following analysis &lt;/p&gt;

&lt;p&gt;The return status (rc) is -EREMOTE (-66) and it seems the&lt;br/&gt;
disposition mask was DISP_IT_EXECD / DISP_LOOKUP_EXECD / DISP_LOOKUP_POS&lt;br/&gt;
/ DISP_OPEN_OPEN / DISP_OPEN_LOCK. According to these information, it could be possible that, prior to the LBUG, MDS has run mdt_reint_open() having in return -EREMOTE just before the LBUG.&lt;/p&gt;

&lt;p&gt;So mdt_reint_open() would return -EREMOTE and then&lt;br/&gt;
mdt_reconstruct_open() does not make attention that in case of -EREMOTE &lt;br/&gt;
return there is no msg transno setting ...&lt;/p&gt;

&lt;p&gt;On the attachment file you can find the struct mdt_thread_info info data &lt;br/&gt;
who made the LBUG and also the req data  (struct ptlrpc_request&#176;&lt;br/&gt;
and lcd data (struct lsd_client_data).&lt;/p&gt;</description>
                <environment></environment>
        <key id="17821">LU-2943</key>
            <summary>LBUG mdt_reconstruct_open()) ASSERTION( (!(rc &lt; 0) || (lustre_msg_get_transno(req-&gt;rq_repmsg) == 0)) )</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="dmoreno">Diego Moreno</reporter>
                        <labels>
                            <label>mn1</label>
                    </labels>
                <created>Mon, 11 Mar 2013 11:07:57 +0000</created>
                <updated>Wed, 20 Nov 2013 09:04:06 +0000</updated>
                            <resolved>Wed, 20 Nov 2013 09:04:06 +0000</resolved>
                                    <version>Lustre 2.1.4</version>
                                                        <due></due>
                            <votes>2</votes>
                                    <watches>12</watches>
                                                                            <comments>
                            <comment id="53709" author="bfaccini" created="Mon, 11 Mar 2013 13:28:29 +0000"  >&lt;p&gt;Hello Diego,&lt;br/&gt;
Thank&apos;s for submitting this issue and these details and infos.&lt;br/&gt;
Will get back to you after reviewing all of this.&lt;br/&gt;
Best Regards.&lt;br/&gt;
Bruno.&lt;/p&gt;</comment>
                            <comment id="53759" author="green" created="Tue, 12 Mar 2013 01:12:53 +0000"  >&lt;p&gt;I think this might be somewhat related to lu-2275 that I fixed for 2.3, perhaps you should try patches from there.&lt;/p&gt;

&lt;p&gt;Additionally, how can there be EREMOTE in 2.1.4, it&apos;s not like we really have DNE there?&lt;/p&gt;

&lt;p&gt;If it&apos;s easy to reproduce, I wonder what&apos;s your reproducer method?&lt;/p&gt;</comment>
                            <comment id="53772" author="louveta" created="Tue, 12 Mar 2013 04:19:02 +0000"  >&lt;p&gt;&amp;gt; If it&apos;s easy to reproduce, I wonder what&apos;s your reproducer method?&lt;/p&gt;

&lt;p&gt;Right now we don&apos;t know. We got it several times in few days but we were not able to identify the root cause (thousand of nodes and hundreds of jobs running ...). Since then we moved back to 2.1.3 were we doesn&apos;t have such problem. We will continue to investigate and try ro reproduce it on test cluster.&lt;/p&gt;</comment>
                            <comment id="53773" author="bfaccini" created="Tue, 12 Mar 2013 04:29:55 +0000"  >&lt;p&gt;Yes, I agree EREMOTE wih 2.1.4 is strange! Diego, can you also provide, if any, the list of patches that may have been added on top of 2.1.4 ?&lt;br/&gt;
Thank&apos;s.&lt;/p&gt;</comment>
                            <comment id="53780" author="adegremont" created="Tue, 12 Mar 2013 06:02:36 +0000"  >&lt;p&gt;&amp;gt; If it&apos;s easy to reproduce, I wonder what&apos;s your reproducer method?&lt;/p&gt;

&lt;p&gt;In fact, it is not so easy to reproduce.&lt;br/&gt;
The problem was, as long as you do not reboot clients involved in this issue, you can restart the MDT as many times as you want, you hit this crash for sure. MDT was restarted 4 times in 1 hour timeframe and we hit this bug very quickly after each restart.&lt;/p&gt;

&lt;p&gt;I&apos;m not sure we simply have to retry again to hit this bug again. Not sure what the client was really doing to trigger this...&lt;/p&gt;</comment>
                            <comment id="53781" author="green" created="Tue, 12 Mar 2013 06:11:12 +0000"  >&lt;p&gt;Quick search for EREMOTE in b2_! returns &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1397&quot; title=&quot;ENOENT on open()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1397&quot;&gt;&lt;del&gt;LU-1397&lt;/del&gt;&lt;/a&gt;, there&apos;s also a debug patch &lt;a href=&quot;http://review.whamcloud.com/#change,2793&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,2793&lt;/a&gt; so please take a look there too.&lt;/p&gt;</comment>
                            <comment id="53783" author="adegremont" created="Tue, 12 Mar 2013 06:26:54 +0000"  >&lt;blockquote&gt;&lt;p&gt;Yes, I agree EREMOTE wih 2.1.4 is strange!&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Yes, I had the same reaction, but the code is there, and for along time, according to git blame (&quot;Openlock cache forward port&quot; 2008-08).&lt;br/&gt;
If you look to mdt_reint_open():1427&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-comment&quot;&gt;/* get openlock &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; is not replay and &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; a client requested it */&lt;/span&gt;
&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!req_is_replay(req) &amp;amp;&amp;amp; create_flags &amp;amp; MDS_OPEN_LOCK) {
        ldlm_mode_t lm;

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (create_flags &amp;amp; FMODE_WRITE)
                lm = LCK_CW;
        &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (create_flags &amp;amp; MDS_FMODE_EXEC)
                lm = LCK_PR;
        &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt;
                lm = LCK_CR;
        mdt_lock_handle_init(lhc);
        mdt_lock_reg_init(lhc, lm);
        rc = mdt_object_lock(info, child, lhc,
                             MDS_INODELOCK_LOOKUP | MDS_INODELOCK_OPEN,
                             MDT_CROSS_LOCK);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc) {
                result = rc;
                GOTO(out_child, result);
        } &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; {
                result = -EREMOTE;
                mdt_set_disposition(info, ldlm_rep, DISP_OPEN_LOCK);
        }
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;You can see that under some condition result could be set to -EREMOTE and return to caller (mdt_reint_open()), with absolutely no link to DNE.&lt;/p&gt;
</comment>
                            <comment id="53787" author="patrick.valentin" created="Tue, 12 Mar 2013 08:49:41 +0000"  >&lt;p&gt;Hello Bruno,&lt;br/&gt;
here is the list of patches that were applied on top of 2.1.4.&lt;/p&gt;

&lt;p&gt;Below is the first set of patches, present on top of both 2.1.3 and 2.1.4. As said above, when the customer moved back to 2.1.3, the problem no longer appeared.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;ORNL-22 general ptlrpcd threads pool support
        From branch b2_1 (id: 71350744808a2791d6b623bfb24623052322380d)

LU-1144 ptlrpc: implement a NUMA aware ptlrpcd binding policy
        This patch is a backport on lustre 2.1 of the master branch patch.

LU-1110 fid: add full support for open-by-fid
        This patch is a backport on lustre 2.1 of the master branch patch.

LU-645  Avoid unnecessary dentry rehashing
        This patch is a backport on lustre 2.1 of the b1_8 branch patch.

LU-1331 changelog: allow changelog to extend record
        This patch is a backport on lustre 2.1 of the master branch patch.

LU-1448 llite: Prevent NULL pointer dereference on disabled OSC
        This patch is a backport on lustre 2.1 of the master branch patch.

LU-1714 lnet: Properly initialize sg_magic value
        This patch is a backport on lustre 2.1 of the master branch patch.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is the second set of patches, which is only on top of 2.1.4:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LU-1887 ptlrpc: grant shrink rpc format is special
        From branch b2_1 (id: 1de6014a19aae85ad92fc00265f9aeb86fb7f0cb)

LU-2613 mdt: update disk for fake transactions
        This patch is coming from &quot;review.whamcloud.com/#change,5143&quot;
        patch set 2, which is still in &quot;Review in Progress&quot; status.

LU-2624 ptlrpc: improve stop of ptlrpcd threads
        This patch is a backport on lustre 2.1 of the master branch patch.

LU-2683 lov: release all locks in closure to release sublock
        This patch is coming from &quot;review.whamcloud.com/#change,5208&quot; patch
        set 2, which was in &quot;Review in Progress&quot; status.
        It is now in master branch since 2013-03-04.

LU-1666 obdclass: reduce lock contention on coh_page_guard
        From branch b2_1 (id: 3d63043afdbf9842ce763bcff1efa30472ec3881)

LU-744  obdclass: revise cl_page refcount
        From branch b2_1 (id: 17f83b93481932e3476b076651ab60e1fbd15136)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;Note: I also found &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt; ticket, which reports the same LBUG on Lustre 2.4.0.&lt;/p&gt;</comment>
                            <comment id="53815" author="bfaccini" created="Tue, 12 Mar 2013 16:35:36 +0000"  >&lt;p&gt;Thank&apos;s Patrick.&lt;/p&gt;

&lt;p&gt;Aurelien, it is true that EREMOTE is already there and since quite a long time! And this puzzle me because actually I still can not understand why you did not hit this before ?? &lt;/p&gt;

&lt;p&gt;Also, info-&amp;gt;mti_spec.sp_cr_flags are MDS_OPEN_OWNEROVERRIDE|MDS_OPEN_LOCK which should come from an NFS export ... And thus we may only have triggered a very rare open reconstruct need for a NFSd Client request ?? And like in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt; case too ??&lt;/p&gt;</comment>
                            <comment id="53886" author="adegremont" created="Wed, 13 Mar 2013 05:54:44 +0000"  >&lt;p&gt;Bruno,&lt;/p&gt;

&lt;p&gt;The fact that we hit this bug with 2.1.4 (Bull 227) and 2.4 (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt;) makes me think some recent change introduce a race condition with this code. For compatibility reason some patches from 2.4 are backported to 2.1.4. Bull backported other patches too.  But I&apos;ve look at the difference between 2.1.3 and 2.1.4 and found no candidate for such regression...&lt;/p&gt;

&lt;p&gt;Strange....&lt;/p&gt;</comment>
                            <comment id="53888" author="bfaccini" created="Wed, 13 Mar 2013 06:10:15 +0000"  >&lt;p&gt;Hello Aurelien,&lt;/p&gt;

&lt;p&gt;Yes this looked strange to me as well, and this is why I better think you hit a very rare situation that trigger and old and still present bug. Can you try to determine from the crash-dump which FS and Client/Node were involved ?? I wonder it could a Lustre FS that some of you Client should then re-export via NFS ??&lt;/p&gt;

&lt;p&gt;BTW, and according to my colleagues, seems that EREMOTE usage for open_lock feature may be avoided, so I may be back with a patch proposal.&lt;/p&gt;</comment>
                            <comment id="53891" author="adegremont" created="Wed, 13 Mar 2013 07:07:05 +0000"  >&lt;p&gt;Filesystem is our scratch fs on TERA-100, which is absolutely not re-exported by NFS.&lt;/p&gt;

&lt;p&gt;Client which seems involved was a classical compute client, as previously said, using MPI-IO (Not sure at all this is related).&lt;/p&gt;</comment>
                            <comment id="54017" author="bfaccini" created="Thu, 14 Mar 2013 10:36:19 +0000"  >&lt;p&gt;Yes, it is right, this can definitely also happen out of NFS-export scenario ...&lt;/p&gt;

&lt;p&gt;So I think that finally you experienced a somewhat rare case of an open-reconstruct/recovery scenario that triggered the bug (like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1702&quot; title=&quot;LustreError: 3218:0: (mdt_open.c:1035:mdt_reconstruct_open()) LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1702&quot;&gt;&lt;del&gt;LU-1702&lt;/del&gt;&lt;/a&gt; in 2.2 and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt; in 2.4), and what cleared the situation is the Client re-boot for downgrade ...&lt;/p&gt;

&lt;p&gt;But let&apos;s wait for the currently investigated fix, as part of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt;, to come with a definitive solution. And also understand why our specific auto-tests (replay-single/test_55 and recovery-small/test_53) may not trigger.&lt;/p&gt;</comment>
                            <comment id="54740" author="sebastien.buisson" created="Mon, 25 Mar 2013 08:54:23 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;Now that a fix for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt; has landed in 2.4, what do we do with this problem?&lt;br/&gt;
Can you advise?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="54851" author="bfaccini" created="Tue, 26 Mar 2013 17:06:24 +0000"  >&lt;p&gt;Master/2.4 fix landed from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt; is at &lt;a href=&quot;http://review.whamcloud.com/#change,5694&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,5694&lt;/a&gt; , I will evaluate the b2_1 back-port.&lt;/p&gt;</comment>
                            <comment id="54994" author="apercher" created="Thu, 28 Mar 2013 10:24:39 +0000"  >&lt;p&gt;I have found the client node who are certainly the client who send the failing request&lt;br/&gt;
and on its log we can see the stack of the request :&lt;/p&gt;


&lt;p&gt;2013-03-06 16:03:04 INFO: task %%A197:17742 blocked for more than 120 seconds.&lt;br/&gt;
2013-03-06 16:03:04 &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.&lt;br/&gt;
2013-03-06 16:03:04 %%A197         D 0000000000000001     0 17742  17718 0x00000000&lt;br/&gt;
2013-03-06 16:03:04  ffff8806f08eb658 0000000000000086 0000000000000000 ffffffffa0686d24&lt;br/&gt;
2013-03-06 16:03:04  0000000000000008 ffff88080f3e3c00 ffff8806f08eb628 ffffffffa06872e7&lt;br/&gt;
2013-03-06 16:03:04  ffff88080775da70 ffff8806f08ebfd8 000000000000db00 ffff88080775da70&lt;br/&gt;
2013-03-06 16:03:04 Call Trace:&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81485ab5&amp;gt;&amp;#93;&lt;/span&gt; schedule_timeout+0x205/0x2d0&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81486a25&amp;gt;&amp;#93;&lt;/span&gt; __down+0x75/0xc0&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81081611&amp;gt;&amp;#93;&lt;/span&gt; down+0x41/0x50&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08e4ae4&amp;gt;&amp;#93;&lt;/span&gt; mdc_enqueue+0x4e4/0x13a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08e5bc0&amp;gt;&amp;#93;&lt;/span&gt; mdc_intent_lock+0x220/0x630 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b61920&amp;gt;&amp;#93;&lt;/span&gt; lmv_intent_open+0x2d0/0x10a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lmv&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b62980&amp;gt;&amp;#93;&lt;/span&gt; lmv_intent_lock+0x290/0x360 &lt;span class=&quot;error&quot;&gt;&amp;#91;lmv&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a2cbd8&amp;gt;&amp;#93;&lt;/span&gt; ll_revalidate_it+0x2b8/0x15b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a2e003&amp;gt;&amp;#93;&lt;/span&gt; ll_revalidate_nd+0x133/0x3a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81171612&amp;gt;&amp;#93;&lt;/span&gt; do_lookup+0x62/0x1e0&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81172b4a&amp;gt;&amp;#93;&lt;/span&gt; path_walk+0x6a/0xe0&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81172d1b&amp;gt;&amp;#93;&lt;/span&gt; do_path_lookup+0x5b/0xa0&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81173c4b&amp;gt;&amp;#93;&lt;/span&gt; do_filp_open+0xfb/0xd00&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811607b9&amp;gt;&amp;#93;&lt;/span&gt; do_sys_open+0x69/0x140&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811608d0&amp;gt;&amp;#93;&lt;/span&gt; sys_open+0x20/0x30&lt;br/&gt;
2013-03-06 16:03:04  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810030f2&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;/p&gt;

&lt;p&gt;I have also attach a file with complete client log&lt;/p&gt;</comment>
                            <comment id="55477" author="sebastien.buisson" created="Thu, 4 Apr 2013 14:30:22 +0000"  >&lt;p&gt;Hi Bruno,&lt;/p&gt;

&lt;p&gt;We tried to backport the fix from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt; to b2_1, but we failed. Have you been able to make progress on this?&lt;br/&gt;
for the record, this issue is the most disruptive one at CEA ATM.&lt;/p&gt;

&lt;p&gt;TIA,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="55584" author="bfaccini" created="Fri, 5 Apr 2013 07:12:02 +0000"  >&lt;p&gt;Yes, I am working on it and as you pointed it is not an easy one, will keep you updated.&lt;/p&gt;</comment>
                            <comment id="55603" author="bfaccini" created="Fri, 5 Apr 2013 13:49:02 +0000"  >&lt;p&gt;Seb,&lt;br/&gt;
Just to share and get this done as quick as possible, did you experience regressions in your porting attempts ? And if yes of what kind ?&lt;br/&gt;
I have one build in progress, will let you know on testing progress soon.&lt;/p&gt;

</comment>
                            <comment id="55680" author="bfaccini" created="Sun, 7 Apr 2013 15:34:14 +0000"  >&lt;p&gt;B2_1 port/patch &lt;a href=&quot;http://review.whamcloud.com/5954&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5954&lt;/a&gt; submitted and successfully passed auto-tests.&lt;/p&gt;</comment>
                            <comment id="55837" author="patrick.valentin" created="Tue, 9 Apr 2013 08:52:44 +0000"  >&lt;p&gt;Bruno,&lt;br/&gt;
I agree with your b2_1 patch. I tried to backport the b2_4 fix from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt;, but there is quite a lot of changes around mdt_open_by_fid_lock(), and I was afraid to miss something.&lt;/p&gt;</comment>
                            <comment id="64941" author="louveta" created="Fri, 23 Aug 2013 08:01:28 +0000"  >&lt;p&gt;Bruno, sorry to says that just after installing the patch, we got a lot of crashes on 3 large clusters.&lt;/p&gt;

&lt;p&gt;The lbug message is :&lt;br/&gt;
(mdt_handler.c:3411:mdt_intent_reint()) ASSERTION( lustre_handle_is_used(&amp;amp;lhc-&amp;gt;mlh_reg_lh) ) failed:&lt;br/&gt;
(mdt_handler.c:3411:mdt_intent_reint()) LBUG&lt;/p&gt;

&lt;p&gt;followed by this stack :&lt;br/&gt;
Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa041a7f5&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa041ae07&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0a841&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_reint+0x4f1/0x530 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c08c09&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x379/0x690 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06653c1&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x361/0x8f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa068b3dd&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x48d/0xf50 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c09586&amp;gt;&amp;#93;&lt;/span&gt; mdt_enqueue+0x46/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bfe762&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x932/0x1750 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bff655&amp;gt;&amp;#93;&lt;/span&gt; mdt_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06ba4f6&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xd16/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810017cc&amp;gt;&amp;#93;&lt;/span&gt; ? __switch_to+0x1ac/0x320&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06b97e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100412a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06b97e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06b97e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81004120&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;Kernel panic - not syncing: LBUG&lt;br/&gt;
Pid: 60412, comm: mdt_440 Not tainted 2.6.32-220.23.1.bl6.Bull.28.8.x86_64 0000001&lt;br/&gt;
Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81484650&amp;gt;&amp;#93;&lt;/span&gt; ? panic+0x78/0x143&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa041ae5b&amp;gt;&amp;#93;&lt;/span&gt; ? lbug_with_loc+0x9b/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0a841&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_intent_reint+0x4f1/0x530 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c08c09&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_intent_policy+0x379/0x690 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06653c1&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_lock_enqueue+0x361/0x8f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa068b3dd&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_handle_enqueue0+0x48d/0xf50 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c09586&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_enqueue+0x46/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bfe762&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_handle_common+0x932/0x1750 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bff655&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06ba4f6&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0xd16/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810017cc&amp;gt;&amp;#93;&lt;/span&gt; ? __switch_to+0x1ac/0x320&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06b97e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100412a&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0xa/0x20&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06b97e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06b97e0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1a80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81004120&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20 &lt;/p&gt;

&lt;p&gt;That was observed on system running lustre 2.1.5 + patches&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;ORNL-22 general ptlrpcd threads pool support&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1144&quot; title=&quot;implement a NUMA aware ptlrpcd binding policy&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1144&quot;&gt;&lt;del&gt;LU-1144&lt;/del&gt;&lt;/a&gt; implement a NUMA aware ptlrpcd binding policy&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1110&quot; title=&quot;MDS Oops in osd_xattr_get() during file open by FID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1110&quot;&gt;&lt;del&gt;LU-1110&lt;/del&gt;&lt;/a&gt; MDS Oops in osd_xattr_get() during file open by FID&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2613&quot; title=&quot;opening and closing file can generate &amp;#39;unreclaimable slab&amp;#39; space&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2613&quot;&gt;&lt;del&gt;LU-2613&lt;/del&gt;&lt;/a&gt; to much unreclaimable slab space&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2624&quot; title=&quot;Stop of ptlrpcd threads is long&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2624&quot;&gt;&lt;del&gt;LU-2624&lt;/del&gt;&lt;/a&gt; ptlrpc fix thread stop&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2683&quot; title=&quot;Client deadlock in cl_lock_mutex_get&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2683&quot;&gt;&lt;del&gt;LU-2683&lt;/del&gt;&lt;/a&gt; client deadlock in cl_lock_mutex_get&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2943&quot; title=&quot;LBUG mdt_reconstruct_open()) ASSERTION( (!(rc &amp;lt; 0) || (lustre_msg_get_transno(req-&amp;gt;rq_repmsg) == 0)) )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2943&quot;&gt;&lt;del&gt;LU-2943&lt;/del&gt;&lt;/a&gt; LBUG in mdt_reconstruct_open()&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I agree this is not the same context than previously, but it located exactly were the patch is modifying the source code.&lt;br/&gt;
The patch was removed on the 3 affected cluster and since, stability is back.&lt;/p&gt;

&lt;p&gt;Alex.&lt;/p&gt;</comment>
                            <comment id="65055" author="bfaccini" created="Mon, 26 Aug 2013 10:22:31 +0000"  >&lt;p&gt;Humm sorry about that, and due to your report I am currently looking again to original/master patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2927&quot; title=&quot;mdt_reconstruct_open() ASSERTION failure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2927&quot;&gt;&lt;del&gt;LU-2927&lt;/del&gt;&lt;/a&gt;. Seems that I missed some case in my 1st back-port and I am testing a new version. Will keep you updated soon.&lt;/p&gt;</comment>
                            <comment id="65381" author="bfaccini" created="Thu, 29 Aug 2013 16:49:00 +0000"  >&lt;p&gt;New version/patch-set #3 of b2_1 port/patch &lt;a href=&quot;http://review.whamcloud.com/5954&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5954&lt;/a&gt; submitted and successfully passed auto-tests.&lt;/p&gt;</comment>
                            <comment id="65815" author="louveta" created="Thu, 5 Sep 2013 08:47:13 +0000"  >&lt;p&gt;What the current status of the latest patch ?&lt;/p&gt;</comment>
                            <comment id="66051" author="bfaccini" created="Mon, 9 Sep 2013 12:19:05 +0000"  >&lt;p&gt;Hello Alex,&lt;br/&gt;
Reviewers agreed my patch, so it should be integrated soon, but in the mean time is there a possibility for you to temporarily integrate it and test it under production work-load? I know it is not easy to setup for you since it affects the MDS side, but I have no idea on how to reproduce locally.&lt;/p&gt;</comment>
                            <comment id="66052" author="sebastien.buisson" created="Mon, 9 Sep 2013 12:43:32 +0000"  >&lt;p&gt;Hi Bruno,&lt;/p&gt;

&lt;p&gt;The patchset #3 of &lt;a href=&quot;http://review.whamcloud.com/5954&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5954&lt;/a&gt; has been rolled out at CEA for test purpose at the end of last week.&lt;br/&gt;
Hopefully we will have news soon.&lt;/p&gt;

&lt;p&gt;Cheers,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="66053" author="louveta" created="Mon, 9 Sep 2013 12:44:52 +0000"  >&lt;p&gt;Hello Bruno,&lt;br/&gt;
The new package with your fix was delivered last Friday, when we got the approval to pick up your patch.&lt;br/&gt;
Remain that we have to find a time frame to install it on the system.&lt;/p&gt;

&lt;p&gt;I&apos;ll keep you inform.&lt;/p&gt;

&lt;p&gt;Cheers,&lt;br/&gt;
Alex.&lt;/p&gt;</comment>
                            <comment id="71777" author="bfaccini" created="Mon, 18 Nov 2013 13:53:12 +0000"  >&lt;p&gt;Hello Alex and Seb, do you have any update fo this ticket ??&lt;br/&gt;
Bye,&lt;br/&gt;
Bruno.&lt;/p&gt;</comment>
                            <comment id="71941" author="sebastien.buisson" created="Wed, 20 Nov 2013 09:00:06 +0000"  >&lt;p&gt;Hi Bruno,&lt;/p&gt;

&lt;p&gt;Support team confirms that your fix does fix the issue.&lt;br/&gt;
Thank you!&lt;/p&gt;

&lt;p&gt;Sebastien.&lt;/p&gt;</comment>
                            <comment id="71942" author="bfaccini" created="Wed, 20 Nov 2013 09:04:06 +0000"  >&lt;p&gt;Cool, thanks for your update Seb. So I am marking this ticket as Fixed.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="21061">LU-3987</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="12437" name="lascaux2420.console.log.mdsissue" size="38222" author="apercher" created="Thu, 28 Mar 2013 10:25:21 +0000"/>
                            <attachment id="12289" name="trace_debug_mdt_reconstruct_open_assertion.txt" size="142196" author="dmoreno" created="Mon, 11 Mar 2013 11:07:57 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvkl3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7064</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>