<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:25:55 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2522] conf-sanity test_23b timed out during mount: RPCs in &quot;Unregistering&quot; phase found</title>
                <link>https://jira.whamcloud.com/browse/LU-2522</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Andreas Dilger &amp;lt;andreas.dilger@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/f71e120a-4a59-11e2-8523-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/f71e120a-4a59-11e2-8523-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_23b failed with the following errors in the client console log:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;15:05:02:Lustre: *** cfs_fail_loc=313, val=0***&lt;br/&gt;
15:05:02:LustreError: 26376:0:(llite_lib.c:537:client_common_fill_super()) cannot start close thread: rc -4&lt;br/&gt;
15:05:23:LustreError: 25912:0:(import.c:324:ptlrpc_invalidate_import()) lustre-OST0000_UUID: rc = -110 waiting for callback (1 != 0)&lt;br/&gt;
15:05:23:LustreError: 25912:0:(import.c:350:ptlrpc_invalidate_import()) @@@ still on sending list  req@ffff880077b10000 x1421825299972112/t0(0) o8-&amp;gt;lustre-OST0000-osc-ffff8800767e7000@10.10.4.147@tcp:28/4 lens 400/264 e 0 to 0 dl 1355958298 ref 1 fl Interpret:REN/0/0 rc -5/0&lt;br/&gt;
15:05:23:LustreError: 25912:0:(import.c:366:ptlrpc_invalidate_import()) lustre-OST0000_UUID: RPCs in &quot;Unregistering&quot; phase found (0). Network is sluggish? Waiting them to error out.&lt;br/&gt;
:&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;repeats&amp;#93;&lt;/span&gt;&lt;br/&gt;
:&lt;br/&gt;
15:08:35:INFO: task mount.lustre:26376 blocked for more than 120 seconds.&lt;br/&gt;
15:08:35:mount.lustre  D 0000000000000000     0 26376  26375 0x00000080&lt;br/&gt;
15:08:35: ffff880076a8d7e8 0000000000000086 0000000050d24816 ffff880076a8d7b8&lt;br/&gt;
15:08:35: 0000000000000000 ffff88007b5e1ba0 ffffffffa0717f80 0000000000000054&lt;br/&gt;
15:08:35: ffff88007cd765f8 ffff880076a8dfd8 000000000000fb88 ffff88007cd765f8&lt;br/&gt;
15:08:35:Call Trace:&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81500605&amp;gt;&amp;#93;&lt;/span&gt; rwsem_down_failed_common+0x95/0x1d0&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81500763&amp;gt;&amp;#93;&lt;/span&gt; rwsem_down_write_failed+0x23/0x30&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8127ea53&amp;gt;&amp;#93;&lt;/span&gt; call_rwsem_down_write_failed+0x13/0x20&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814ffc62&amp;gt;&amp;#93;&lt;/span&gt; ? down_write+0x32/0x40&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0642061&amp;gt;&amp;#93;&lt;/span&gt; client_disconnect_export+0x61/0x460 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0877335&amp;gt;&amp;#93;&lt;/span&gt; osc_disconnect+0xa5/0x2a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0504ff7&amp;gt;&amp;#93;&lt;/span&gt; class_disconnect_export_list+0x337/0x670 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05057b6&amp;gt;&amp;#93;&lt;/span&gt; class_disconnect_exports+0x116/0x2f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052e60f&amp;gt;&amp;#93;&lt;/span&gt; class_cleanup+0x16f/0xdc0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0502fd6&amp;gt;&amp;#93;&lt;/span&gt; ? class_name2dev+0x56/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0530305&amp;gt;&amp;#93;&lt;/span&gt; class_process_config+0x10a5/0x1ca0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07a2028&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_log_return+0x28/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0529b01&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_cfg_new+0x391/0x7e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0531079&amp;gt;&amp;#93;&lt;/span&gt; class_manual_cleanup+0x179/0x6f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a70f40&amp;gt;&amp;#93;&lt;/span&gt; ll_put_super+0xf0/0x370 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a7bf29&amp;gt;&amp;#93;&lt;/span&gt; ll_fill_super+0x7f9/0x1500 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05477cc&amp;gt;&amp;#93;&lt;/span&gt; lustre_fill_super+0x12c/0x1b00 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117d256&amp;gt;&amp;#93;&lt;/span&gt; ? set_anon_super+0x56/0x100&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e123&amp;gt;&amp;#93;&lt;/span&gt; ? sget+0x3e3/0x480&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117d200&amp;gt;&amp;#93;&lt;/span&gt; ? set_anon_super+0x0/0x100&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05476a0&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_fill_super+0x0/0x1b00 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e66f&amp;gt;&amp;#93;&lt;/span&gt; get_sb_nodev+0x5f/0xa0&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0532d15&amp;gt;&amp;#93;&lt;/span&gt; lustre_get_sb+0x25/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e2cb&amp;gt;&amp;#93;&lt;/span&gt; vfs_kern_mount+0x7b/0x1b0&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e472&amp;gt;&amp;#93;&lt;/span&gt; do_kern_mount+0x52/0x130&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119cb42&amp;gt;&amp;#93;&lt;/span&gt; do_mount+0x2d2/0x8d0&lt;br/&gt;
15:08:36: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119d1d0&amp;gt;&amp;#93;&lt;/span&gt; sys_mount+0x90/0xe0&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: conf-sanity 23b&lt;/p&gt;</description>
                <environment></environment>
        <key id="17018">LU-2522</key>
            <summary>conf-sanity test_23b timed out during mount: RPCs in &quot;Unregistering&quot; phase found</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Sat, 22 Dec 2012 17:07:10 +0000</created>
                <updated>Tue, 14 Dec 2021 23:00:19 +0000</updated>
                            <resolved>Tue, 14 Dec 2021 23:00:19 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>13</watches>
                                                                            <comments>
                            <comment id="49778" author="yong.fan" created="Sat, 29 Dec 2012 06:35:40 +0000"  >&lt;p&gt;Another failure instance:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/3a25305c-514d-11e2-b56e-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/3a25305c-514d-11e2-b56e-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="58667" author="mdiep" created="Thu, 16 May 2013 16:50:23 +0000"  >&lt;p&gt;I can reliably reproduce this issue on 2.3 with the following&lt;/p&gt;

&lt;p&gt;./llmount.sh&lt;br/&gt;
run mdtest&lt;br/&gt;
./llmountcleanup.sh&lt;/p&gt;</comment>
                            <comment id="82910" author="behlendorf" created="Wed, 30 Apr 2014 19:58:49 +0000"  >&lt;p&gt;We&apos;ve been observing this for some time in production on our 2.4 clients.  I just wanted to increase the visibility of this so you&apos;re aware this issue is happening on real systems.&lt;/p&gt;</comment>
                            <comment id="84559" author="morrone" created="Tue, 20 May 2014 23:57:28 +0000"  >&lt;p&gt;As Brian mentioned we have been seeing this in production.  We would really like our clients to be able to reconnect instead of blocking forever.&lt;/p&gt;</comment>
                            <comment id="84570" author="ezell" created="Wed, 21 May 2014 02:21:29 +0000"  >&lt;p&gt;We&apos;ve also seen it in production at ORNL on Cray 2.3 clients&lt;/p&gt;</comment>
                            <comment id="84690" author="pjones" created="Thu, 22 May 2014 12:04:48 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please advise on this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="84768" author="bobijam" created="Fri, 23 May 2014 05:43:42 +0000"  >&lt;p&gt;Pushed &lt;a href=&quot;http://review.whamcloud.com/10427&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10427&lt;/a&gt; to add more request status flag in log message to help debug this issue.&lt;/p&gt;

&lt;p&gt;I think I need network expert to help diagnose this issue, it looks that in ptlrpc_check_set(), when the OST_CONNECT request was moved to RQ_PHASE_UNREGISTERING phase, the LNetMDUnlink() (I think this is for unlinking the request, and in someway it should end up cleaning up request::rq_receiving_reply and ::rq_must_unlink and) should ensues ptlrpc_client_recv_or_unlink() checking the request to be false and moves the request to RQ_PHASE_COMPLETE, while the repeated &lt;tt&gt;RPCs in &quot;Unregistering&quot; phase found&lt;/tt&gt; shows the other way.&lt;/p&gt;

&lt;p&gt;I suspect that LNetMDUnlink() does not accomplish what it should does in some corner cases. &lt;/p&gt;</comment>
                            <comment id="84848" author="ashehata" created="Sun, 25 May 2014 15:35:45 +0000"  >&lt;p&gt;LNetMDUnlink unlinks the memory descriptor from the Match entry.  This frees the buffers and any possible messages using this buffer.  The only reason the MD wouldn&apos;t be unlinked immediately is if the md-&amp;gt;md_refcount &amp;gt; 0.  The md-&amp;gt;md_refcount is incremented when the MD is in use.  That happens when you&apos;re sending a message using the MD or when you are receiving a message into the MD.  Which case is this problem?&lt;/p&gt;</comment>
                            <comment id="84853" author="bobijam" created="Mon, 26 May 2014 04:44:18 +0000"  >&lt;p&gt;I think the md was occupied by sending. I&apos;m guessing the request::rq_must_unlink does not have chance to be set to 0. I&apos;ve pushed a debug patch to reveal its ::rq_must_unlink and ::rq_receiving_reply flags.&lt;/p&gt;</comment>
                            <comment id="84887" author="ashehata" created="Mon, 26 May 2014 21:59:01 +0000"  >&lt;p&gt;In ptl_send_rpc() it does the following:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        &lt;span class=&quot;code-comment&quot;&gt;/* If the MD attach succeeds, there _will_ be a reply_in callback */&lt;/span&gt;
        request-&amp;gt;rq_receiving_reply = !noreply;
        &lt;span class=&quot;code-comment&quot;&gt;/* We are responsible &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; unlinking the reply buffer */&lt;/span&gt;
        request-&amp;gt;rq_must_unlink = !noreply;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Which means these flags would be 1 if there is a reply expected.&lt;/p&gt;

&lt;p&gt;These flags are reset to 0, when the reply callback is executed in reply_in_callback()&lt;/p&gt;

&lt;p&gt;I&apos;m thinking if the reply is delayed, these flags wouldn&apos;t be set to 0.  Would that lead to the observed behavior?&lt;/p&gt;

&lt;p&gt;It would be good to try and make sure that there are no dropped messages.&lt;/p&gt;</comment>
                            <comment id="84903" author="bobijam" created="Tue, 27 May 2014 09:11:22 +0000"  >&lt;p&gt;Minh, &lt;/p&gt;

&lt;p&gt;Can you help to reproduce the issue with the debug patch &lt;a href=&quot;http://review.whamcloud.com/10427&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10427&lt;/a&gt; ?&lt;/p&gt;</comment>
                            <comment id="84975" author="ashehata" created="Tue, 27 May 2014 21:01:23 +0000"  >&lt;p&gt;I updated patch  &lt;a href=&quot;http://review.whamcloud.com/10427&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10427&lt;/a&gt; with debug code to go over the MDs in the system and see if any of them are in LNET_MD_FLAG_ZOMBIE state.  This is suppose to be a transitional state.  If an MD stays in that state for a period of time that means that the md_refcount &amp;gt; 0.  Which in turn means that there is a message being processed on that MD, but lnet_finalize() was never called on that message.  We&apos;re suspecting that the reply_in_callback() which sets the flags of interest to 0 is not being called.  As far as I could tell at the moment, this would happen if lnet_finalize() is not being called.  If lnet_finalize is not being called, we would leak an MD, which should remain in LNET_MD_FLAG_ZOMBIE state.  This patch should determine if this is the case.  If one such MD is found a CERROR is outputed.&lt;/p&gt;</comment>
                            <comment id="85930" author="mdiep" created="Thu, 5 Jun 2014 22:04:53 +0000"  >&lt;p&gt;I haven&apos;t been able to hit this issue again on master&lt;/p&gt;</comment>
                            <comment id="85944" author="pjones" created="Thu, 5 Jun 2014 23:29:55 +0000"  >&lt;p&gt;Could we try porting this path to b2_4 and seeing if it will reproduce then?&lt;/p&gt;</comment>
                            <comment id="86083" author="bobijam" created="Mon, 9 Jun 2014 12:52:46 +0000"  >&lt;p&gt;porting of the debug patch for b2_4 at &lt;a href=&quot;http://review.whamcloud.com/10653&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10653&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="86189" author="mdiep" created="Tue, 10 Jun 2014 05:25:07 +0000"  >&lt;p&gt;I also can not reproduce this on b2_4 &lt;a href=&quot;http://review.whamcloud.com/10653&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10653&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I am working on b2_3 without any debug patch&lt;/p&gt;</comment>
                            <comment id="86259" author="mdiep" created="Tue, 10 Jun 2014 19:18:01 +0000"  >&lt;p&gt;I also can not reproduce this in b2_3&lt;/p&gt;</comment>
                            <comment id="114945" author="marc@llnl.gov" created="Mon, 11 May 2015 21:28:35 +0000"  >&lt;p&gt;We saw a burst of the debug messages on March 10th, 2015:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2015-03-09 17:16:52 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection to lcrash-MDT0000 (at 10.1.1.120@o2ib9) was lost; in progress operations using this service will wait for recovery to complete
2015-03-09 17:16:52 Lustre: Skipped 3449155 previous similar messages
2015-03-09 17:18:38 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection restored to lcrash-MDT0000 (at 10.1.1.120@o2ib9)
2015-03-09 17:18:38 Lustre: Skipped 3462984 previous similar messages
2015-03-09 17:19:46 LustreError: 11-0: lcrash-MDT0000-mdc-ffff8804462fd400: Communicating with 10.1.1.120@o2ib9, operation llog_origin_handle_destroy failed with -19.
2015-03-09 17:19:46 LustreError: Skipped 8882876 previous similar messages
2015-03-09 17:26:52 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection to lcrash-MDT0000 (at 10.1.1.120@o2ib9) was lost; in progress operations using this service will wait for recovery to complete
2015-03-09 17:26:52 Lustre: Skipped 3461504 previous similar messages
2015-03-09 17:26:55 LNetError: 28827:0:(lib-md.c:407:LNetMDCheck()) LU-2522: md(ffff88038f4694c0)-&amp;gt;md_flags has LNET_MD_FLAG_ZOMBIE set
2015-03-09 17:28:38 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection restored to lcrash-MDT0000 (at 10.1.1.120@o2ib9)
2015-03-09 17:28:38 Lustre: Skipped 3472891 previous similar messages
2015-03-09 17:29:18 LNetError: 28829:0:(lib-md.c:407:LNetMDCheck()) LU-2522: md(ffff880305a45a40)-&amp;gt;md_flags has LNET_MD_FLAG_ZOMBIE set
2015-03-09 17:29:46 LustreError: 11-0: lcrash-MDT0000-mdc-ffff8804462fd400: Communicating with 10.1.1.120@o2ib9, operation llog_origin_handle_destroy failed with -19.
2015-03-09 17:29:46 LustreError: Skipped 8232155 previous similar messages
2015-03-09 17:36:52 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection to lcrash-MDT0000 (at 10.1.1.120@o2ib9) was lost; in progress operations using this service will wait for recovery to complete
2015-03-09 17:36:52 Lustre: Skipped 3484395 previous similar messages
2015-03-09 17:37:30 LNetError: 28829:0:(lib-md.c:407:LNetMDCheck()) LU-2522: md(ffff8802255738c0)-&amp;gt;md_flags has LNET_MD_FLAG_ZOMBIE set
2015-03-09 17:38:26 LNetError: 28843:0:(lib-md.c:407:LNetMDCheck()) LU-2522: md(ffff8802d20ecb40)-&amp;gt;md_flags has LNET_MD_FLAG_ZOMBIE set
2015-03-09 17:38:38 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection restored to lcrash-MDT0000 (at 10.1.1.120@o2ib9)
2015-03-09 17:38:38 Lustre: Skipped 3475352 previous similar messages
2015-03-09 17:39:46 LustreError: 11-0: lcrash-MDT0000-mdc-ffff8804462fd400: Communicating with 10.1.1.120@o2ib9, operation llog_origin_handle_destroy failed with -19.
2015-03-09 17:39:46 LustreError: Skipped 8602702 previous similar messages
2015-03-09 17:46:52 Lustre: lcrash-MDT0000-mdc-ffff8804462fd400: Connection to lcrash-MDT0000 (at 10.1.1.120@o2ib9) was lost; in progress operations using this service will wait for recovery to complete
2015-03-09 17:46:52 Lustre: Skipped 3456170 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I don&apos;t know exactly which version we were running at the time.  Chris, do you happen to know?&lt;/p&gt;</comment>
                            <comment id="117044" author="bobijam" created="Mon, 1 Jun 2015 15:07:00 +0000"  >&lt;p&gt;the latest debug message does not fit this issue, this &quot;llog_origin_handle_destroy failed with -19&quot; means that MDT does not function as the client expect (MDT cannot find the llog context), and it cannot handle this client&apos;s config llog.&lt;/p&gt;</comment>
                            <comment id="119973" author="morrone" created="Tue, 30 Jun 2015 21:29:30 +0000"  >&lt;p&gt;I agree, I do not see a connection with Marc&apos;s log to this ticket.  If the only place Marc could find this happening was on lcrash (which is a developer crash-and-burn development system, so it could have occurred there for any number of reasons), then I don&apos;t think we have grounds to keep this as a top llnl  issue.  I am removing the topllnl label.&lt;/p&gt;
</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="25321">LU-5259</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="33160">LU-7434</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzve6v:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5938</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>