<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:17:24 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1524] Parent doesn&apos;t exist </title>
                <link>https://jira.whamcloud.com/browse/LU-1524</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;mds starts to report &quot;parent doesn&apos;t exist&quot; Load in the mds became very high we ending up dumping the server. Have vmcore if needed. Could be a dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1350&quot; title=&quot;Parent doesn&amp;#39;t exist!&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1350&quot;&gt;&lt;del&gt;LU-1350&lt;/del&gt;&lt;/a&gt; but with hung service thread.&lt;/p&gt;


&lt;p&gt;Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) header@ffff880c18139480[0x0, 1, &lt;span class=&quot;error&quot;&gt;&amp;#91;0x325698b32c:0x9:0x0&amp;#93;&lt;/span&gt; hash lru]&lt;/p&gt;
{ ^M
Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) ....mdt@ffff880c181394d8mdt-object@ffff880c18139480(ioepoch=0 flags=0x0, epochcount=0, writecount=0)^M
Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) ....cmm@ffff880b36c71d40[local]^M
Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) ....mdd@ffff8808ceb92a40mdd-object@ffff8808ceb92a40(open_count=0, valid=0, cltime=0, flags=0)^M
Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) ....osd-ldiskfs@ffff8808ceb92980osd-ldiskfs-object@ffff8808ceb92980(i:(null):0/0)[plain]^M
Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) }
&lt;p&gt; header@ffff880c18139480^M&lt;br/&gt;
Lustre: 4504:0:(mdt_handler.c:888:mdt_getattr_name_lock()) Parent doesn&apos;t exist!^M&lt;/p&gt;

&lt;p&gt;Lustre: 4946:0:(mdt_xattr.c:375:mdt_reint_setxattr()) client miss to set OBD_MD_FLCTIME when setxattr: [object &lt;span class=&quot;error&quot;&gt;&amp;#91;0x2f00600666:0x44:0x0&amp;#93;&lt;/span&gt;] &lt;span class=&quot;error&quot;&gt;&amp;#91;valid 68719476736&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
Lustre: Service thread pid 9153 was inactive for 200.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:^M&lt;br/&gt;
Lustre: Skipped 2 previous similar messages^M&lt;br/&gt;
Pid: 9153, comm: mdt_mdss_153^M&lt;br/&gt;
^M&lt;br/&gt;
Call Trace:^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a76785&amp;gt;&amp;#93;&lt;/span&gt; jbd2_log_wait_commit+0xc5/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;jbd2&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8108fff0&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81012c69&amp;gt;&amp;#93;&lt;/span&gt; ? read_tsc+0x9/0x20^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a6eb4b&amp;gt;&amp;#93;&lt;/span&gt; jbd2_journal_stop+0x2cb/0x320 &lt;span class=&quot;error&quot;&gt;&amp;#91;jbd2&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ac7048&amp;gt;&amp;#93;&lt;/span&gt; __ldiskfs_journal_stop+0x68/0xa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ldiskfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c498f8&amp;gt;&amp;#93;&lt;/span&gt; osd_trans_stop+0xb8/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_ldiskfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089fb06&amp;gt;&amp;#93;&lt;/span&gt; ? seq_store_write+0xc6/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089f867&amp;gt;&amp;#93;&lt;/span&gt; seq_store_trans_stop+0x57/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089fd8c&amp;gt;&amp;#93;&lt;/span&gt; seq_store_update+0x9c/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089e99a&amp;gt;&amp;#93;&lt;/span&gt; seq_server_alloc_meta+0x4aa/0x720 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0630800&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_swab_lu_seq_range+0x0/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089efc8&amp;gt;&amp;#93;&lt;/span&gt; seq_query+0x3b8/0x680 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075e954&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be7e85&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x8d5/0x1810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075e954&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be8e35&amp;gt;&amp;#93;&lt;/span&gt; mdt_mdss_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076f42e&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xb7e/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076e8b0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076e8b0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076e8b0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20^M&lt;/p&gt;</description>
                <environment></environment>
        <key id="14930">LU-1524</key>
            <summary>Parent doesn&apos;t exist </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="6">Not a Bug</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="mhanafi">Mahmoud Hanafi</reporter>
                        <labels>
                    </labels>
                <created>Thu, 14 Jun 2012 22:03:56 +0000</created>
                <updated>Thu, 21 Mar 2013 20:22:25 +0000</updated>
                            <resolved>Thu, 21 Mar 2013 20:22:25 +0000</resolved>
                                    <version>Lustre 2.1.1</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="40621" author="pjones" created="Thu, 14 Jun 2012 22:42:21 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Is this a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1350&quot; title=&quot;Parent doesn&amp;#39;t exist!&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1350&quot;&gt;&lt;del&gt;LU-1350&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="40622" author="bobijam" created="Thu, 14 Jun 2012 22:53:23 +0000"  >&lt;p&gt;Mahmoud,&lt;/p&gt;

&lt;p&gt;What situation does this issue happen? During MDS recovery? Or what operaions made MDS start to dumping this info and hung there?&lt;/p&gt;</comment>
                            <comment id="40623" author="mhanafi" created="Thu, 14 Jun 2012 23:02:59 +0000"  >&lt;p&gt;This happen after the Parent&apos;s... message. The MDS was not in recovery. but the same mds after recover is again seeing a high load. &lt;/p&gt;


&lt;p&gt;Jun 14 19:26:50 nbp2-mds kernel: Lustre: nbp2-MDT0000: sending delayed replies to recovered clients&lt;br/&gt;
Jun 14 19:26:50 nbp2-mds kernel: Lustre: 4747:0:(mds_lov.c:1024:mds_notify()) MDS mdd_obd-nbp2-MDT0000: in recovery, not resetting orphans on nbp2-OST0000_UUID&lt;br/&gt;
Jun 14 19:26:50 nbp2-mds kernel: Lustre: 4747:0:(mds_lov.c:1024:mds_notify()) Skipped 14 previous similar messages&lt;br/&gt;
Jun 14 19:26:50 nbp2-mds kernel: Lustre: MDS mdd_obd-nbp2-MDT0000: nbp2-OST000a_UUID now active, resetting orphans&lt;br/&gt;
Jun 14 19:26:50 nbp2-mds kernel: Lustre: MDS mdd_obd-nbp2-MDT0000: nbp2-OST000e_UUID now active, resetting orphans&lt;br/&gt;
Jun 14 19:26:50 nbp2-mds kernel: Lustre: Skipped 1 previous similar message&lt;br/&gt;
Jun 14 19:26:50 nbp2-mds kernel: Lustre: 4747:0:(mdd_orphans.c:371:orph_key_test_and_del()) Found orphan! Delete it&lt;br/&gt;
Jun 14 19:27:13 nbp2-mds pcp-pmie&lt;span class=&quot;error&quot;&gt;&amp;#91;3585&amp;#93;&lt;/span&gt;: High 1-minute load average 332load@nbp2-mds&lt;br/&gt;
Jun 14 19:27:37 nbp2-mds kernel: LustreError: 4349:0:(lov_request.c:569:lov_update_create_set()) error creating fid 0x1 sub-object on OST idx 2/1: rc = -107&lt;br/&gt;
Jun 14 19:27:37 nbp2-mds kernel: LustreError: 4349:0:(lov_request.c:569:lov_update_create_set()) error creating fid 0x1 sub-object on OST idx 5/1: rc = -107&lt;br/&gt;
Jun 14 19:27:38 nbp2-mds kernel: LustreError: 4349:0:(lov_request.c:569:lov_update_create_set()) error creating fid 0x3 sub-object on OST idx 4/1: rc = -107&lt;br/&gt;
Jun 14 19:27:38 nbp2-mds kernel: LustreError: 4349:0:(lov_request.c:569:lov_update_create_set()) Skipped 12 previous similar messages&lt;br/&gt;
Jun 14 19:27:39 nbp2-mds kernel: LustreError: 4349:0:(lov_request.c:569:lov_update_create_set()) error creating fid 0x2 sub-object on OST idx 3/1: rc = -107&lt;/p&gt;</comment>
                            <comment id="40624" author="bobijam" created="Fri, 15 Jun 2012 00:07:28 +0000"  >&lt;p&gt;Looks like the OSTs are busy deleteing orphan objects, and MDS&apos;s busy waiting for fid sequence update.&lt;/p&gt;

&lt;p&gt;Wangdi, could you help to have a look?&lt;/p&gt;</comment>
                            <comment id="40629" author="bobijam" created="Fri, 15 Jun 2012 01:50:58 +0000"  >&lt;p&gt;Whether did you observe that the MDS keep high load after recovery or did the high load keep a short time after recovery?&lt;/p&gt;

&lt;p&gt;Would you mind uploading vmcore or sysrq-trigger output, we&apos;d like to know the whole picture of this situation.&lt;/p&gt;

&lt;p&gt;(lov_request.c:569:lov_update_create_set()) error creating fid 0x1 sub-object on OST idx 2/1: rc = -107) means the 1st object hasn&apos;t created on this OST yet, is this a production system or a testing system?&lt;/p&gt;</comment>
                            <comment id="40869" author="mhanafi" created="Tue, 19 Jun 2012 14:04:40 +0000"  >&lt;p&gt;We hit this issue again. Uploading console logs with call traces. We are not able to upload the vmcore due to security.&lt;/p&gt;
</comment>
                            <comment id="40900" author="di.wang" created="Wed, 20 Jun 2012 01:12:06 +0000"  >&lt;p&gt;According to the log. there is a LBUG here.&lt;/p&gt;

&lt;p&gt;&amp;lt;0&amp;gt;LustreError: 11533:0:(lu_object.c:113:lu_object_put()) ASSERTION(cfs_list_empty(&amp;amp;top-&amp;gt;loh_lru)) failed^M&lt;br/&gt;
&amp;lt;0&amp;gt;LustreError: 11533:0:(lu_object.c:113:lu_object_put()) LBUG^M&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 11533, comm: mdt_rdpg_07^M&lt;br/&gt;
&amp;lt;4&amp;gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056b855&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056be95&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x75/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0576da6&amp;gt;&amp;#93;&lt;/span&gt; libcfs_assertion_failed+0x66/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa066e069&amp;gt;&amp;#93;&lt;/span&gt; lu_object_put+0x209/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0f041&amp;gt;&amp;#93;&lt;/span&gt; mdt_close+0x2f1/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be7031&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_unpack_req_pack_rep+0x51/0x5d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075fd84&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07620d8&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_check_version+0xc8/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be7e85&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x8d5/0x1810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075fd84&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be8e75&amp;gt;&amp;#93;&lt;/span&gt; mdt_readpage_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa077085e&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xb7e/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fce0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fce0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fce0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20^M&lt;br/&gt;
&amp;lt;4&amp;gt;^M&lt;br/&gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG^M&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 11533, comm: mdt_rdpg_07 Not tainted 2.6.32-220.4.1.el6.20120130.x86_64.lustre211 #1^M&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81520c76&amp;gt;&amp;#93;&lt;/span&gt; ? panic+0x78/0x164^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056beeb&amp;gt;&amp;#93;&lt;/span&gt; ? lbug_with_loc+0xcb/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0576da6&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_assertion_failed+0x66/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa066e069&amp;gt;&amp;#93;&lt;/span&gt; ? lu_object_put+0x209/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0f041&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_close+0x2f1/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be7031&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_unpack_req_pack_rep+0x51/0x5d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075fd84&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07620d8&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_check_version+0xc8/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be7e85&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_handle_common+0x8d5/0x1810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075fd84&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be8e75&amp;gt;&amp;#93;&lt;/span&gt; ? mdt_readpage_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa077085e&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0xb7e/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fce0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0xa/0x20^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fce0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fce0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20^M&lt;/p&gt;

&lt;p&gt;Seems like 1013.&lt;/p&gt;</comment>
                            <comment id="40903" author="mhanafi" created="Wed, 20 Jun 2012 01:46:58 +0000"  >&lt;p&gt;Yes the LBUG was after the system was rebooted and that is a different issue&lt;/p&gt;

&lt;p&gt;Sent from my iPhone&lt;/p&gt;

</comment>
                            <comment id="40904" author="di.wang" created="Wed, 20 Jun 2012 01:49:31 +0000"  >&lt;p&gt;Yeah, saw that. btw: the MDT is almost full?&lt;/p&gt;</comment>
                            <comment id="40909" author="di.wang" created="Wed, 20 Jun 2012 03:08:39 +0000"  >&lt;p&gt;According to the console log. it seems the journal can not be synced to disk in time. See &lt;/p&gt;

&lt;p&gt;Pid: 18717, comm: mdt_mdss_343^M&lt;br/&gt;
^M&lt;br/&gt;
Call Trace:^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a76785&amp;gt;&amp;#93;&lt;/span&gt; jbd2_log_wait_commit+0xc5/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;jbd2&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8108fff0&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a76836&amp;gt;&amp;#93;&lt;/span&gt; ? __jbd2_log_start_commit+0x36/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;jbd2&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a6eb4b&amp;gt;&amp;#93;&lt;/span&gt; jbd2_journal_stop+0x2cb/0x320 &lt;span class=&quot;error&quot;&gt;&amp;#91;jbd2&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ac7048&amp;gt;&amp;#93;&lt;/span&gt; __ldiskfs_journal_stop+0x68/0xa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ldiskfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c4a8f8&amp;gt;&amp;#93;&lt;/span&gt; osd_trans_stop+0xb8/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_ldiskfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089fb06&amp;gt;&amp;#93;&lt;/span&gt; ? seq_store_write+0xc6/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089f867&amp;gt;&amp;#93;&lt;/span&gt; seq_store_trans_stop+0x57/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089fd8c&amp;gt;&amp;#93;&lt;/span&gt; seq_store_update+0x9c/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089e99a&amp;gt;&amp;#93;&lt;/span&gt; seq_server_alloc_meta+0x4aa/0x720 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0631800&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_swab_lu_seq_range+0x0/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa089efc8&amp;gt;&amp;#93;&lt;/span&gt; seq_query+0x3b8/0x680 &lt;span class=&quot;error&quot;&gt;&amp;#91;fid&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075fce4&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be7e85&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x8d5/0x1810 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa075fce4&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_msg_get_opc+0x94/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0be8e35&amp;gt;&amp;#93;&lt;/span&gt; mdt_mdss_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07707be&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xb7e/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fc40&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c14a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fc40&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076fc40&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c140&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20^M&lt;br/&gt;
^M&lt;br/&gt;
LustreError: dumping log to /tmp/lustre-log.1340067239.18717^M&lt;/p&gt;

&lt;p&gt;Then all other processes just wait there to get enough journal credit to start the journal. Hmm I do not know why the journal can not be synced to disk in time. Are you using internal or external journal for your system?&lt;/p&gt;

&lt;p&gt;Actually in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-795&quot; title=&quot;per-transaction commit callbacks (ORI-107 port)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-795&quot;&gt;&lt;del&gt;LU-795&lt;/del&gt;&lt;/a&gt;, this seq code has been changed, i.e. most time seq update does not need to be sync at all. Unfortunately, that patch is only landed on 2.2, which will definitely help here. Do you have plan to upgrade your system to 2.2 anytime soon.&lt;/p&gt;

&lt;p&gt;And also we saw a lot lnet error msg here&lt;/p&gt;

&lt;p&gt;Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.167@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.30.1@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.132@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.130@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.29.8@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Which usually means client is being reboot. Is that true? And how many clients in your system? Thanks.&lt;/p&gt;</comment>
                            <comment id="40929" author="mhanafi" created="Wed, 20 Jun 2012 10:51:54 +0000"  >&lt;p&gt;As for disk space we have lots free&lt;br/&gt;
Filesystem            Inodes   IUsed   IFree IUse% Mounted on&lt;br/&gt;
/dev/sda8            20096128  130179 19965949    1% /&lt;br/&gt;
tmpfs                3074827       1 3074826    1% /dev/shm&lt;br/&gt;
/dev/sda7              36720      50   36670    1% /boot&lt;br/&gt;
/dev/mapper/nbp2--vg-mgs&lt;br/&gt;
                       64000     138   63862    1% /mnt/lustre/mgs&lt;br/&gt;
/dev/mapper/nbp2--vg-mdt2&lt;br/&gt;
                     268435456 35795386 232640070   14% /mnt/lustre/nbp2-mdt&lt;br/&gt;
nbp2-mds ~ # df -k&lt;br/&gt;
Filesystem           1K-blocks      Used Available Use% Mounted on&lt;br/&gt;
/dev/sda8            484397076  11907584 448142440   3% /&lt;br/&gt;
tmpfs                 12299308         0  12299308   0% /dev/shm&lt;br/&gt;
/dev/sda7               141845     33932    100589  26% /boot&lt;br/&gt;
/dev/mapper/nbp2--vg-mgs&lt;br/&gt;
                       1007896     19888    988008   2% /mnt/lustre/mgs&lt;br/&gt;
/dev/mapper/nbp2--vg-mdt2&lt;br/&gt;
                     939453620  23505792 915947828   3% /mnt/lustre/nbp2-mdt&lt;/p&gt;

&lt;p&gt;We also noticed the slowness in the journal. We have check the RAID subsystem no issue were found. &lt;/p&gt;

&lt;p&gt;We do have a lot of clients over 10000.  &lt;/p&gt;

&lt;p&gt;We are not planing to upgrade to 2.2 but we are looking at 2.1.2. We may be able to pull-in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-795&quot; title=&quot;per-transaction commit callbacks (ORI-107 port)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-795&quot;&gt;&lt;del&gt;LU-795&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="40949" author="di.wang" created="Wed, 20 Jun 2012 16:25:28 +0000"  >&lt;p&gt;Hmm, I found some error msg &lt;/p&gt;

&lt;p&gt;LustreError: 12820:0:(llog_cat.c:298:llog_cat_add_rec()) llog_write_rec -28: lh=ffff88014c7a2ec0^M&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;-- MARK -- Mon Jun 18 09:00:00 2012&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
LustreError: 10403:0:(llog_cat.c:298:llog_cat_add_rec()) llog_write_rec -28: lh=ffff88014b0b9300^M&lt;br/&gt;
LustreError: 12833:0:(llog_cat.c:298:llog_cat_add_rec()) llog_write_rec -28: lh=ffff880240ccf140^M&lt;/p&gt;

&lt;p&gt;Ah, that is because current log handle does not have space for the record, instead of the disk space. Hmm, the error msg is a little misleading. I will correct it a bit.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-795&quot; title=&quot;per-transaction commit callbacks (ORI-107 port)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-795&quot;&gt;&lt;del&gt;LU-795&lt;/del&gt;&lt;/a&gt; definitely fix some journal sync problem here, so it should help in your case.&lt;/p&gt;

&lt;p&gt;So there are more than 10k clients, do you expect they are being reboot during the time you collecting the console log, since we see a lot &lt;/p&gt;

&lt;p&gt;Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.167@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.30.1@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.132@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.130@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.29.8@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;in the message you upload.&lt;/p&gt;

&lt;p&gt;Also it seems you also met a lot &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-617&quot; title=&quot;LBUG: (mdt_recovery.c:787:mdt_last_rcvd_update()) ASSERTION(req_is_replay(req)) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-617&quot;&gt;&lt;del&gt;LU-617&lt;/del&gt;&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;LustreError: 18929:0:(mdt_recovery.c:793:mdt_last_rcvd_update()) Trying to overwrite bigger transno:on-disk: 171918688025, new: 171918688024 replay: 0. see &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-617&quot; title=&quot;LBUG: (mdt_recovery.c:787:mdt_last_rcvd_update()) ASSERTION(req_is_replay(req)) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-617&quot;&gt;&lt;del&gt;LU-617&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
...&lt;/p&gt;

&lt;p&gt;Unfortunately, the fix is also landed on 2.2. (the patch on 2.1 is just a workaround fix).&lt;/p&gt;











</comment>
                            <comment id="40950" author="mhanafi" created="Wed, 20 Jun 2012 16:40:26 +0000"  >&lt;p&gt;does kiblnd_passive_connect always mean the client is being reconnecting from a reboot? Or is it just it&apos;s reconnecting, may due to fabric issue.&lt;/p&gt;</comment>
                            <comment id="40951" author="di.wang" created="Wed, 20 Jun 2012 16:43:18 +0000"  >&lt;p&gt;That is what I learned, but I am not lnet expert. Liang, could you please comment here?&lt;/p&gt;</comment>
                            <comment id="41296" author="pjones" created="Fri, 29 Jun 2012 00:41:50 +0000"  >&lt;p&gt;Added Liang as a watcher so that he can see Wang Di&apos;s question&lt;/p&gt;</comment>
                            <comment id="41298" author="liang" created="Fri, 29 Jun 2012 00:47:16 +0000"  >&lt;blockquote&gt;
&lt;p&gt;So there are more than 10k clients, do you expect they are being reboot during the time you collecting the console log, since we see a lot&lt;/p&gt;

&lt;p&gt;Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.167@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.30.1@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.132@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.59.130@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;br/&gt;
Lustre: 2056:0:(o2iblnd_cb.c:2326:kiblnd_passive_connect()) Conn stale 10.151.29.8@o2ib &lt;span class=&quot;error&quot;&gt;&amp;#91;old ver: 12, new ver: 12&amp;#93;&lt;/span&gt;&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;I think we generate these messages only for rebooted remote peers.&lt;/p&gt;</comment>
                            <comment id="54604" author="pjones" created="Thu, 21 Mar 2013 20:22:25 +0000"  >&lt;p&gt;As per NASA ok to close ticket&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11632" name="service160.bz2" size="113187" author="mhanafi" created="Tue, 19 Jun 2012 14:03:17 +0000"/>
                            <attachment id="11631" name="service160.gz" size="154433" author="mhanafi" created="Tue, 19 Jun 2012 14:01:56 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv3db:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4045</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>