<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:21:06 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1951] SWL: osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&gt;i_nlink &gt; 0 ) failed:</title>
                <link>https://jira.whamcloud.com/browse/LU-1951</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;MDS crash dumped, attempting to locate dump at this time.&lt;br/&gt;
Message from MDS:&lt;/p&gt;

&lt;p&gt;2012-09-16 11:35:57 LustreError: 5503:0:(osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed:&lt;br/&gt;
2012-09-16 11:35:57 LustreError: 5503:0:(osd_handler.c:2343:osd_object_ref_del()) LBUG&lt;/p&gt;

&lt;p&gt;This looks like a possible dup of ORI-577, however that bug was supposed to have been fixed. &lt;/p&gt;

&lt;p&gt;MDS did not dump a stack, was configured with panic_on_lbug.&lt;br/&gt;
Will attempt to replicate &lt;/p&gt;</description>
                <environment>SWL Hyperion/LLNL</environment>
        <key id="15994">LU-1951</key>
            <summary>SWL: osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&gt;i_nlink &gt; 0 ) failed:</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                    </labels>
                <created>Sun, 16 Sep 2012 14:47:09 +0000</created>
                <updated>Tue, 16 Apr 2013 02:38:17 +0000</updated>
                            <resolved>Tue, 16 Apr 2013 02:38:17 +0000</resolved>
                                    <version>Lustre 2.3.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>12</watches>
                                                                            <comments>
                            <comment id="44969" author="cliffw" created="Sun, 16 Sep 2012 19:14:48 +0000"  >&lt;p&gt;Okay, have vmcore here is the LBUG stack.&lt;/p&gt;

&lt;p&gt;Pid: 5503, comm: mdt01_012&lt;/p&gt;

&lt;p&gt;Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0397905&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0397f17&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa100c6a1&amp;gt;&amp;#93;&lt;/span&gt; osd_object_ref_del+0x1d1/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_ldiskfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef109d&amp;gt;&amp;#93;&lt;/span&gt; mdo_ref_del+0xad/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ef6715&amp;gt;&amp;#93;&lt;/span&gt; mdd_unlink+0x815/0xd40 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08a61e4&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_versions+0xa4/0x120 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06a7037&amp;gt;&amp;#93;&lt;/span&gt; cml_unlink+0x97/0x200 &lt;span class=&quot;error&quot;&gt;&amp;#91;cmm&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f7b454&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_unlink+0x634/0x9e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f78151&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_rec+0x41/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f719aa&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_internal+0x50a/0x810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f71cf4&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint+0x44/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f65802&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x922/0x1740 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f666f5&amp;gt;&amp;#93;&lt;/span&gt; mdt_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08b499d&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_server_handle_request+0x40d/0xea0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08abf37&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa7/0x2a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03a85b1&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810533f3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08b5f89&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xb59/0x1860 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08b5430&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1860 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08b5430&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1860 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08b5430&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1860 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;Kernel panic - not syncing: LBUG&lt;/p&gt;</comment>
                            <comment id="44996" author="pjones" created="Mon, 17 Sep 2012 01:39:21 +0000"  >&lt;p&gt;Hongchao&lt;/p&gt;

&lt;p&gt;Could you please comment on this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="45031" author="hongchao.zhang" created="Mon, 17 Sep 2012 09:57:00 +0000"  >&lt;p&gt;this issue is different from ORI-577, in which is the local object(say, llog file) that triggers the assertion, and this one&lt;br/&gt;
is due to normal file/object during unlink it.&lt;/p&gt;

&lt;p&gt;this issue could be caused by LDLM problem, for MDT uses LDLM locks to protect parallel directory lock, &lt;/p&gt;

&lt;p&gt;Hi Cliff,&lt;br/&gt;
could you please test it with the debug patch &lt;a href=&quot;http://review.whamcloud.com/#change,4009&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4009&lt;/a&gt;, &lt;br/&gt;
which disables &quot;MDD_DISABLE_PDO_LOCK&quot; in mdd/mdd_internal.h, so we can test whether it is related to LDLM or not, thanks!&lt;/p&gt;</comment>
                            <comment id="45046" author="cliffw" created="Mon, 17 Sep 2012 12:10:46 +0000"  >&lt;p&gt;The patch does not build. I have a vmcore from the crash - would that be useful?&lt;/p&gt;</comment>
                            <comment id="45130" author="hongchao.zhang" created="Tue, 18 Sep 2012 07:08:22 +0000"  >&lt;p&gt;okay, thanks!&lt;br/&gt;
I&apos;m also trying to create an updated debug patch to collect some logs to help track this issue.&lt;/p&gt;</comment>
                            <comment id="45164" author="cliffw" created="Tue, 18 Sep 2012 14:59:42 +0000"  >&lt;p&gt;The core dump is on brent.whamcloud.com ~/cliffw/lu1951/vmcore-lu1951.gz&lt;/p&gt;</comment>
                            <comment id="45216" author="hongchao.zhang" created="Wed, 19 Sep 2012 08:16:10 +0000"  >&lt;p&gt;the possible patch is tracked at &lt;a href=&quot;http://review.whamcloud.com/#change,4041&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4041&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Hi Cliff, Could you please test it with this patch? thanks!&lt;/p&gt;</comment>
                            <comment id="45344" author="yong.fan" created="Fri, 21 Sep 2012 09:52:37 +0000"  >&lt;p&gt;We have found memory corruption in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1976&quot; title=&quot;SWL - mds hard crash &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1976&quot;&gt;&lt;del&gt;LU-1976&lt;/del&gt;&lt;/a&gt;, the crashed OI index node may cause accessing unknown RAM areas. Under such case, some objects/inodes may be affected also. Although there is no directly evidence to say that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1951&quot; title=&quot;SWL: osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1951&quot;&gt;&lt;del&gt;LU-1951&lt;/del&gt;&lt;/a&gt; is just the duplication of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1976&quot; title=&quot;SWL - mds hard crash &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1976&quot;&gt;&lt;del&gt;LU-1976&lt;/del&gt;&lt;/a&gt;, I tend to the duplication of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1976&quot; title=&quot;SWL - mds hard crash &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1976&quot;&gt;&lt;del&gt;LU-1976&lt;/del&gt;&lt;/a&gt;. Since the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1976&quot; title=&quot;SWL - mds hard crash &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1976&quot;&gt;&lt;del&gt;LU-1976&lt;/del&gt;&lt;/a&gt; is ready, we can wait for the new result with such patch to check whether &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1951&quot; title=&quot;SWL: osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1951&quot;&gt;&lt;del&gt;LU-1951&lt;/del&gt;&lt;/a&gt; can be reproduced or not.&lt;/p&gt;</comment>
                            <comment id="45386" author="pjones" created="Sat, 22 Sep 2012 01:18:56 +0000"  >&lt;p&gt;Dropping priority as this only occurred once in five days of testing&lt;/p&gt;</comment>
                            <comment id="45701" author="cliffw" created="Fri, 28 Sep 2012 02:02:39 +0000"  >&lt;p&gt;Looks like we still have this issue with build 24:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;ep 27 21:56:35 hyperion-rst6 kernel: LustreError: 5611:0:(osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed:
Sep 27 21:56:35 hyperion-rst6 kernel: LustreError: 5611:0:(osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed:
Sep 27 21:56:35 hyperion-rst6 kernel: LustreError: 5611:0:(osd_handler.c:2343:osd_object_ref_del()) LBUG
Sep 27 21:56:35 hyperion-rst6 kernel: LustreError: 5611:0:(osd_handler.c:2343:osd_object_ref_del()) LBUG
Sep 27 21:56:35 hyperion-rst6 kernel: Pid: 5611, comm: mdt00_015
Sep 27 21:56:35 hyperion-rst6 kernel:
Sep 27 21:56:35 hyperion-rst6 kernel: Call Trace:
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0392905&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0392f17&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0a946a1&amp;gt;] osd_object_ref_del+0x1d1/0x210 [osd_ldiskfs]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0efa09d&amp;gt;] mdo_ref_del+0xad/0xb0 [mdd]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0eff715&amp;gt;] mdd_unlink+0x815/0xdb0 [mdd]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa09581e4&amp;gt;] ? lustre_msg_get_versions+0xa4/0x120 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa08bd037&amp;gt;] cml_unlink+0x97/0x200 [cmm]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f83ddf&amp;gt;] ? mdt_version_get_save+0x8f/0xd0 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f84454&amp;gt;] mdt_reint_unlink+0x634/0x9e0 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f81151&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f7a9aa&amp;gt;] mdt_reint_internal+0x50a/0x810 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f7acf4&amp;gt;] mdt_reint+0x44/0xe0 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f6e802&amp;gt;] mdt_handle_common+0x922/0x1740 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f6f6f5&amp;gt;] mdt_regular_handle+0x15/0x20 [mdt]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0966b3c&amp;gt;] ptlrpc_server_handle_request+0x41c/0xe00 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa039365e&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa03a513f&amp;gt;] ? lc_watchdog_touch+0x6f/0x180 [libcfs]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa095df37&amp;gt;] ? ptlrpc_wait_event+0xa7/0x2a0 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffff810533f3&amp;gt;] ? __wake_up+0x53/0x70
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0968111&amp;gt;] ptlrpc_main+0xbf1/0x19e0 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0967520&amp;gt;] ? ptlrpc_main+0x0/0x19e0 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0967520&amp;gt;] ? ptlrpc_main+0x0/0x19e0 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffffa0967520&amp;gt;] ? ptlrpc_main+0x0/0x19e0 [ptlrpc]
Sep 27 21:56:35 hyperion-rst6 kernel: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
Sep 27 21:56:35 hyperion-rst6 kernel:
Sep 27 21:56:35 hyperion-rst6 kernel: Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="45702" author="cliffw" created="Fri, 28 Sep 2012 02:17:22 +0000"  >&lt;p&gt;Also, some issues when recovering the MDS after the dump:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Lustre: lustre-MDT0000: used disk, loading
Lustre: 4204:0:(ldlm_lib.c:2139:target_recovery_init()) RECOVERY: service lustre-MDT0000, 91 recoverable clients, last_transno 8624944131
Lustre: 4140:0:(mgc_request.c:1534:mgc_process_recover_log()) &lt;span class=&quot;code-object&quot;&gt;Process&lt;/span&gt; recover log lustre-mdtir error -22
Lustre: lustre-MDT0000: Will be in recovery &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; at least 5:00, or until 91 clients reconnect
LustreError: 4216:0:(mds_lov.c:351:mds_lov_update_objids()) Unexpected gap in objids
LustreError: 4216:0:(mdt_recovery.c:497:mdt_txn_stop_cb()) Replay transno 8624998654 failed: rc -39
LustreError: 4216:0:(mds_lov.c:351:mds_lov_update_objids()) Unexpected gap in objids
Lustre: lustre-MDT0000: disconnecting 1 stale clients
Lustre: lustre-MDT0000: Recovery over after 1:22, of 91 clients 90 recovered and 1 was evicted.
Lustre: MDS mdd_obd-lustre-MDT0000: lustre-OST000f_UUID now active, resetting orphans
Lustre: MDS mdd_obd-lustre-MDT0000: lustre-OST001b_UUID now active, resetting orphans
Lustre: Skipped 5 previous similar messages
Lustre: Skipped 45 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="45703" author="cliffw" created="Fri, 28 Sep 2012 02:21:02 +0000"  >&lt;p&gt;/var/log/messages from evicted client&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Sep 27 23:13:24 hyperion787 kernel: Lustre: 3928:0:(client.c:1917:ptlrpc_expire_one_request()) @@@ Request  sent has timed out &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; slow reply: [sent 1348812749/real 1348812749]  req@ffff8802116cec00 x1414295663929201/t0(0) o250-&amp;gt;MGC192.168.127.6@o2ib@192.168.127.6@o2ib:26/25 lens 400/544 e 0 to 1 dl 1348812804 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
Sep 27 23:13:24 hyperion787 kernel: Lustre: 3928:0:(client.c:1917:ptlrpc_expire_one_request()) Skipped 31 previous similar messages
Sep 27 23:14:59 hyperion787 kernel: Lustre: Evicted from MGS (at MGC192.168.127.6@o2ib_0) after server handle changed from 0x5e1536afc64a7a2d to 0x9107144105607180
Sep 27 23:14:59 hyperion787 kernel: Lustre: MGC192.168.127.6@o2ib: Reactivating &lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt;
Sep 27 23:14:59 hyperion787 kernel: Lustre: MGC192.168.127.6@o2ib: Connection restored to MGS (at 192.168.127.6@o2ib)
Sep 27 23:16:21 hyperion787 kernel: Lustre: lustre-MDT0000-mdc-ffff88033ba61400: Connection restored to lustre-MDT0000 (at 192.168.127.6@o2ib1)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="45707" author="hongchao.zhang" created="Fri, 28 Sep 2012 03:49:45 +0000"  >&lt;p&gt;Is the patch (&lt;a href=&quot;http://review.whamcloud.com/#change,4041&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4041&lt;/a&gt;) included in this build(24)?&lt;/p&gt;</comment>
                            <comment id="45711" author="pjones" created="Fri, 28 Sep 2012 04:38:30 +0000"  >&lt;p&gt;No it is not.&lt;/p&gt;</comment>
                            <comment id="45712" author="pjones" created="Fri, 28 Sep 2012 04:43:51 +0000"  >&lt;p&gt;Hongchao&lt;/p&gt;

&lt;p&gt;If you think that it should be then please set inspectors on the patch.&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="45778" author="cliffw" created="Sat, 29 Sep 2012 23:08:10 +0000"  >&lt;p&gt;MDS crashed, panic stack:&lt;br/&gt;
2012-09-27 21:56:35 LustreError: 5611:0:(osd_handler.c:2343:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed:&lt;br/&gt;
2012-09-27 21:56:35 LustreError: 5611:0:(osd_handler.c:2343:osd_object_ref_del()) LBUG&lt;br/&gt;
2012-09-27 21:56:35 Pid: 5611, comm: mdt00_015&lt;br/&gt;
2012-09-27 21:56:35&lt;br/&gt;
2012-09-27 21:56:35 Sep 27 21:56:35 Call Trace:&lt;br/&gt;
2012-09-27 21:56:35 hyperion-rst6 ke &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0392905&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 rnel: LustreErro &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0392f17&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 r: 5611:0:(osd_h &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a946a1&amp;gt;&amp;#93;&lt;/span&gt; osd_object_ref_del+0x1d1/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_ldiskfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 andler.c:2343:os &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0efa09d&amp;gt;&amp;#93;&lt;/span&gt; mdo_ref_del+0xad/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 d_object_ref_del &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0eff715&amp;gt;&amp;#93;&lt;/span&gt; mdd_unlink+0x815/0xdb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 ()) ASSERTION( i &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09581e4&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_versions+0xa4/0x120 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 node-&amp;gt;i_nlink &amp;gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08bd037&amp;gt;&amp;#93;&lt;/span&gt; cml_unlink+0x97/0x200 &lt;span class=&quot;error&quot;&gt;&amp;#91;cmm&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 0 ) failed:&lt;br/&gt;
2012-09-27 21:56:35 Sep &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f83ddf&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_version_get_save+0x8f/0xd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  27 21:56:35 hyp &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f84454&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_unlink+0x634/0x9e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 erion-rst6 kerne &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f81151&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_rec+0x41/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 l: LustreError:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f7a9aa&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_internal+0x50a/0x810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 5611:0:(osd_hand &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f7acf4&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint+0x44/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 ler.c:2343:osd_o &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f6e802&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x922/0x1740 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 bject_ref_del()) &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f6f6f5&amp;gt;&amp;#93;&lt;/span&gt; mdt_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  LBUG&lt;br/&gt;
2012-09-27 21:56:35  LBUG&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0966b3c&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_server_handle_request+0x41c/0xe00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa039365e&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_timer_arm+0xe/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03a513f&amp;gt;&amp;#93;&lt;/span&gt; ? lc_watchdog_touch+0x6f/0x180 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa095df37&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa7/0x2a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810533f3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0968111&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xbf1/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0967520&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0967520&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0967520&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
2012-09-27 21:56:35&lt;br/&gt;
2012-09-27 21:56:35 Kernel panic - not syncing: LBUG&lt;br/&gt;
2012-09-27 21:56:35 Pid: 5611, comm: mdt00_015 Tainted: P           ---------------    2.6.32-279.5.1.el6_lustre.x86_64 #1&lt;br/&gt;
2012-09-27 21:56:35 Sep 27 21:56:35 Call Trace:&lt;br/&gt;
2012-09-27 21:56:35 hyperion-rst6 ke &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814fd58a&amp;gt;&amp;#93;&lt;/span&gt; ? panic+0xa0/0x168&lt;br/&gt;
2012-09-27 21:56:35 rnel: Kernel pan &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0392f6b&amp;gt;&amp;#93;&lt;/span&gt; ? lbug_with_loc+0x9b/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 ic - not syncing &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a946a1&amp;gt;&amp;#93;&lt;/span&gt; ? osd_object_ref_del+0x1d1/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_ldiskfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35 : LBUG&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0efa09d&amp;gt;&amp;#93;&lt;/span&gt; ? mdo_ref_del+0xad/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0eff715&amp;gt;&amp;#93;&lt;/span&gt; ? mdd_unlink+0x815/0xdb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09581e4&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_versions+0xa4/0x120 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08bd037&amp;gt;&amp;#93;&lt;/span&gt; ? cml_unlink+0x97/0x200 &lt;span class=&quot;error&quot;&gt;&amp;#91;cmm&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f83ddf&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_version_get_save+0x8f/0xd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f84454&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_reint_unlink+0x634/0x9e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f81151&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_reint_rec+0x41/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f7a9aa&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_reint_internal+0x50a/0x810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f7acf4&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_reint+0x44/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:35  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f6e802&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_handle_common+0x922/0x1740 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f6f6f5&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0966b3c&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_server_handle_request+0x41c/0xe00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa039365e&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_timer_arm+0xe/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03a513f&amp;gt;&amp;#93;&lt;/span&gt; ? lc_watchdog_touch+0x6f/0x180 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa095df37&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa7/0x2a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810533f3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0968111&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0xbf1/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0967520&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0xa/0x20&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0967520&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0967520&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x19e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2012-09-27 21:56:36  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;vmcore is on brent in ~/cliffw/lu1948/erofs&lt;/p&gt;</comment>
                            <comment id="45780" author="liang" created="Sat, 29 Sep 2012 23:47:17 +0000"  >&lt;p&gt;Check the last comment posted by Cliff on &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1948&quot; title=&quot;ldiskfs - MDS goes read-only (SWL)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1948&quot;&gt;&lt;del&gt;LU-1948&lt;/del&gt;&lt;/a&gt;, we hit this again while running SWL. &lt;/p&gt;</comment>
                            <comment id="45988" author="cliffw" created="Thu, 4 Oct 2012 11:25:27 +0000"  >&lt;p&gt;Hit this again while running SWL, backtrace:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;PID: 4891   TASK: ffff88016dedaaa0  CPU: 13  COMMAND: &lt;span class=&quot;code-quote&quot;&gt;&quot;mdt03_014&quot;&lt;/span&gt;
 #0 [ffff88016e693918] machine_kexec at ffffffff8103281b
 #1 [ffff88016e693978] crash_kexec at ffffffff810ba792
 #2 [ffff88016e693a48] panic at ffffffff814fd591
 #3 [ffff88016e693ac8] lbug_with_loc at ffffffffa0395f6b [libcfs]
 #4 [ffff88016e693ae8] osd_object_ref_del at ffffffffa0a8b6c1 [osd_ldiskfs]
 #5 [ffff88016e693b18] mdo_ref_del at ffffffffa0ef0ffd [mdd]
 #6 [ffff88016e693b28] mdd_unlink at ffffffffa0ef6675 [mdd]
 #7 [ffff88016e693be8] cml_unlink at ffffffffa06bc037 [cmm]
 #8 [ffff88016e693c28] mdt_reint_unlink at ffffffffa0f7b454 [mdt]
 #9 [ffff88016e693ca8] mdt_reint_rec at ffffffffa0f78151 [mdt]
#10 [ffff88016e693cc8] mdt_reint_internal at ffffffffa0f719aa [mdt]
#11 [ffff88016e693d18] mdt_reint at ffffffffa0f71cf4 [mdt]
#12 [ffff88016e693d38] mdt_handle_common at ffffffffa0f65802 [mdt]
#13 [ffff88016e693d88] mdt_regular_handle at ffffffffa0f666f5 [mdt]
#14 [ffff88016e693d98] ptlrpc_server_handle_request at ffffffffa095db3c [ptlrpc]
#15 [ffff88016e693e98] ptlrpc_main at ffffffffa095f111 [ptlrpc]
#16 [ffff88016e693f48] kernel_thread at ffffffff8100c14a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="46019" author="di.wang" created="Thu, 4 Oct 2012 16:17:08 +0000"  >&lt;p&gt;The lustre debug dump log. Though not much useful for this LBUG. But it seems there are some lnet error, Liang, could you please have a look? &lt;/p&gt;</comment>
                            <comment id="46040" author="liang" created="Fri, 5 Oct 2012 05:33:46 +0000"  >&lt;p&gt;I found something suspicious in mdd_rename(), but I&apos;m not expert of this, so please check this for me:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        /* Remove old target object
         * For tobj is remote case cmm layer has processed
         * and set tobj to NULL then. So when tobj is NOT NULL,
         * it must be local one.
         */
        if (tobj &amp;amp;&amp;amp; mdd_object_exists(mdd_tobj)) {
                mdd_write_lock(env, mdd_tobj, MOR_TGT_CHILD);
                if (mdd_is_dead_obj(mdd_tobj)) {
                        mdd_write_unlock(env, mdd_tobj);
                        /* shld not be dead, something is wrong */
                        CERROR(&quot;tobj is dead, something is wrong\n&quot;);
                        rc = -EINVAL;
                        goto cleanup;
                }
                mdo_ref_del(env, mdd_tobj, handle);

                /* Remove dot reference. */
                if (is_dir)
                        mdo_ref_del(env, mdd_tobj, handle);

                la-&amp;gt;la_valid = LA_CTIME;
                rc = mdd_attr_check_set_internal(env, mdd_tobj, la, handle, 0);
                if (rc)
                        GOTO(fixup_tpobj, rc);

                rc = mdd_finish_unlink(env, mdd_tobj, ma, handle);
                mdd_write_unlock(env, mdd_tobj);
                if (rc)
                        GOTO(fixup_tpobj, rc);

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;If mdd_attr_check_set_internal() or mdd_finish_unlink() failed, it will try to revert changes by re-inserting @mdd_tobj into @mdd_tpobj again without fix refcount of @mdd_tobj:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;fixup_tpobj:
        if (rc) {
                rc2 = __mdd_index_delete(env, mdd_tpobj, tname, is_dir, handle,
                                         BYPASS_CAPA);
                if (rc2)
                        CWARN(&quot;tp obj fix error %d\n&quot;,rc2);

                if (mdd_tobj &amp;amp;&amp;amp; mdd_object_exists(mdd_tobj) &amp;amp;&amp;amp;
                    !mdd_is_dead_obj(mdd_tobj)) {
                        rc2 = __mdd_index_insert(env, mdd_tpobj,
                                         mdo2fid(mdd_tobj), tname,
                                         is_dir, handle,
                                         BYPASS_CAPA);

                        if (rc2)
                                CWARN(&quot;tp obj fix error %d\n&quot;,rc2);
                }
        }

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So if everything got reverted, refcount on target object will be wrong.&lt;br/&gt;
Is this analysis correct? &lt;/p&gt;</comment>
                            <comment id="46045" author="liang" created="Fri, 5 Oct 2012 08:29:59 +0000"  >&lt;p&gt;I&apos;ve posted another patch for this: &lt;a href=&quot;http://review.whamcloud.com/#change,4197&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4197&lt;/a&gt;&lt;br/&gt;
it should have fixed something but not sure if it can fix this bug.&lt;/p&gt;</comment>
                            <comment id="46052" author="bzzz" created="Fri, 5 Oct 2012 12:14:21 +0000"  >&lt;p&gt;Liang, probably makes sense to set add CERROR() to see whether we hit this path.&lt;/p&gt;</comment>
                            <comment id="46230" author="pjones" created="Mon, 8 Oct 2012 23:23:25 +0000"  >&lt;p&gt;Dropping priority because landed for 2.3&lt;/p&gt;</comment>
                            <comment id="46737" author="adilger" created="Thu, 18 Oct 2012 12:33:04 +0000"  >&lt;p&gt;Liang, Oleg, what about &lt;a href=&quot;http://review.whamcloud.com/4136?&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4136?&lt;/a&gt;  That patch didn&apos;t land to b2_3.  Was that intended to fix the MDS crash, or is it a secondary problem that doesn&apos;t need to be fixed for 2.3.0?&lt;/p&gt;</comment>
                            <comment id="46761" author="liang" created="Thu, 18 Oct 2012 22:53:18 +0000"  >&lt;p&gt;I think the problem fixed by review-4136 existed for long time (since 2.1), so probably it&apos;s not the reason of the crash here, but we should land it to master at least. &lt;/p&gt;</comment>
                            <comment id="47062" author="bogl" created="Mon, 29 Oct 2012 14:58:19 +0000"  >&lt;p&gt;patch for master: &lt;a href=&quot;http://review.whamcloud.com/4405&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4405&lt;/a&gt;&lt;br/&gt;
port of &lt;a href=&quot;http://review.whamcloud.com/#change,4197&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4197&lt;/a&gt;.  Context too different to just be cherry picked directly.&lt;/p&gt;</comment>
                            <comment id="56373" author="hongchao.zhang" created="Tue, 16 Apr 2013 02:38:17 +0000"  >&lt;p&gt;duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3022&quot; title=&quot;osd_handler.c:2534:osd_object_ref_del()) ASSERTION( inode-&amp;gt;i_nlink &amp;gt; 0 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3022&quot;&gt;&lt;del&gt;LU-3022&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11949" name="dump1.out.gz" size="686432" author="di.wang" created="Thu, 4 Oct 2012 16:17:08 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv573:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4375</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>