<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:38:21 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3952] llite_nfs.c:349:ll_get_parent()) ASSERTION( body-&gt;valid &amp; (0x00000001ULL) ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-3952</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;I couldn&apos;t find this exact stack in JIRA so I&apos;m reporting it. Finally had to mount with abort_recov to stop the machine from hitting the same LBUG. kdump available upon request.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&amp;lt;0&amp;gt;LustreError: 3436:0:(llite_nfs.c:349:ll_get_parent()) ASSERTION( body-&amp;gt;valid &amp;amp; (0x00000001ULL) ) failed: 
&amp;lt;0&amp;gt;LustreError: 3436:0:(llite_nfs.c:349:ll_get_parent()) LBUG
&amp;lt;4&amp;gt;Pid: 3436, comm: nfsd
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0432895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0432e97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ac0ff3&amp;gt;] ll_get_parent+0x7b3/0x820 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa02f53b0&amp;gt;] reconnect_path+0x160/0x310 [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa02f57aa&amp;gt;] exportfs_decode_fh+0xea/0x2bc [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bfa810&amp;gt;] ? nfsd_acceptable+0x0/0x120 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa034d5b0&amp;gt;] ? cache_check+0x60/0x360 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0c00702&amp;gt;] ? exp_find_key+0x62/0xb0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bfad0a&amp;gt;] fh_verify+0x32a/0x640 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0c059ac&amp;gt;] nfsd3_proc_getattr+0x6c/0xe0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bf743e&amp;gt;] nfsd_dispatch+0xfe/0x240 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343604&amp;gt;] svc_process_common+0x344/0x640 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81063310&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343c40&amp;gt;] svc_process+0x110/0x160 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bf7b62&amp;gt;] nfsd+0xc2/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bf7aa0&amp;gt;] ? nfsd+0x0/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81096936&amp;gt;] kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810968a0&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 3436, comm: nfsd Tainted: G           ---------------  T 2.6.32-358.6.2.el6_lustre.g230b174.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8150d878&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0432eeb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ac0ff3&amp;gt;] ? ll_get_parent+0x7b3/0x820 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa02f53b0&amp;gt;] ? reconnect_path+0x160/0x310 [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa02f57aa&amp;gt;] ? exportfs_decode_fh+0xea/0x2bc [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bfa810&amp;gt;] ? nfsd_acceptable+0x0/0x120 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa034d5b0&amp;gt;] ? cache_check+0x60/0x360 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0c00702&amp;gt;] ? exp_find_key+0x62/0xb0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bfad0a&amp;gt;] ? fh_verify+0x32a/0x640 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0c059ac&amp;gt;] ? nfsd3_proc_getattr+0x6c/0xe0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bf743e&amp;gt;] ? nfsd_dispatch+0xfe/0x240 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343604&amp;gt;] ? svc_process_common+0x344/0x640 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81063310&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343c40&amp;gt;] ? svc_process+0x110/0x160 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bf7b62&amp;gt;] ? nfsd+0xc2/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0bf7aa0&amp;gt;] ? nfsd+0x0/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81096936&amp;gt;] ? kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0ca&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810968a0&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Lustre client: 2.6.32-358.6.2.el6_lustre.g230b174, exporting via NFS.</environment>
        <key id="20964">LU-3952</key>
            <summary>llite_nfs.c:349:ll_get_parent()) ASSERTION( body-&gt;valid &amp; (0x00000001ULL) ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="daire">Daire Byrne</reporter>
                        <labels>
                            <label>mn4</label>
                    </labels>
                <created>Mon, 16 Sep 2013 12:23:08 +0000</created>
                <updated>Tue, 29 Jul 2014 09:56:41 +0000</updated>
                            <resolved>Sat, 4 Jan 2014 14:58:57 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.1</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>18</watches>
                                                                            <comments>
                            <comment id="66734" author="adilger" created="Mon, 16 Sep 2013 16:14:16 +0000"  >&lt;p&gt;Daire, it appears from the stack that this is on a client which is re-exporting the Lustre filesystem via NFS?&lt;/p&gt;</comment>
                            <comment id="66736" author="daire" created="Mon, 16 Sep 2013 16:18:03 +0000"  >&lt;p&gt;Yes that is correct. Although the NFS access would have been very minimal at the time.&lt;/p&gt;</comment>
                            <comment id="68147" author="daire" created="Wed, 2 Oct 2013 14:30:33 +0000"  >&lt;p&gt;Another instance. Again we have the kdump vmcore if that is useful. Minimal to zero NFS access.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&amp;lt;3&amp;gt;LustreError: 4858:0:(llite_nfs.c:107:search_inode_for_lustre()) can&apos;t get object attrs, fid [0x200007260:0x19409:0x0], rc -2
&amp;lt;0&amp;gt;LustreError: 4855:0:(llite_nfs.c:349:ll_get_parent()) ASSERTION( body-&amp;gt;valid &amp;amp; (0x00000001ULL) ) failed: 
&amp;lt;0&amp;gt;LustreError: 4855:0:(llite_nfs.c:349:ll_get_parent()) LBUG
&amp;lt;4&amp;gt;Pid: 4855, comm: nfsd
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0486895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0486e97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b08ff3&amp;gt;] ll_get_parent+0x7b3/0x820 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa01223b0&amp;gt;] reconnect_path+0x160/0x310 [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa01227aa&amp;gt;] exportfs_decode_fh+0xea/0x2bc [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043d810&amp;gt;] ? nfsd_acceptable+0x0/0x120 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa034d5b0&amp;gt;] ? cache_check+0x60/0x360 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0443702&amp;gt;] ? exp_find_key+0x62/0xb0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043dd0a&amp;gt;] fh_verify+0x32a/0x640 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa04489ac&amp;gt;] nfsd3_proc_getattr+0x6c/0xe0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043a43e&amp;gt;] nfsd_dispatch+0xfe/0x240 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343604&amp;gt;] svc_process_common+0x344/0x640 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81063310&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343c40&amp;gt;] svc_process+0x110/0x160 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043ab62&amp;gt;] nfsd+0xc2/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043aaa0&amp;gt;] ? nfsd+0x0/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81096936&amp;gt;] kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810968a0&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 4855, comm: nfsd Tainted: G           ---------------  T 2.6.32-358.6.2.el6_lustre.g230b174.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8150d878&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0486eeb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b08ff3&amp;gt;] ? ll_get_parent+0x7b3/0x820 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa01223b0&amp;gt;] ? reconnect_path+0x160/0x310 [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa01227aa&amp;gt;] ? exportfs_decode_fh+0xea/0x2bc [exportfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043d810&amp;gt;] ? nfsd_acceptable+0x0/0x120 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa034d5b0&amp;gt;] ? cache_check+0x60/0x360 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0443702&amp;gt;] ? exp_find_key+0x62/0xb0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043dd0a&amp;gt;] ? fh_verify+0x32a/0x640 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa04489ac&amp;gt;] ? nfsd3_proc_getattr+0x6c/0xe0 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043a43e&amp;gt;] ? nfsd_dispatch+0xfe/0x240 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343604&amp;gt;] ? svc_process_common+0x344/0x640 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81063310&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0343c40&amp;gt;] ? svc_process+0x110/0x160 [sunrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043ab62&amp;gt;] ? nfsd+0xc2/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa043aaa0&amp;gt;] ? nfsd+0x0/0x160 [nfsd]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81096936&amp;gt;] ? kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0ca&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810968a0&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="68191" author="adilger" created="Wed, 2 Oct 2013 19:52:53 +0000"  >&lt;p&gt;Looks like this is being hit in the case where an NFS file handle is being accessed for a disconnected directory dentry, and NFS wants to connect it to the dcache (in reconnect_path()).  In ll_get_parent() the client sends an RPC to the MDS to do a lookup for &quot;..&quot; in the directory, but this doesn&apos;t return the expected result to the client.  I can see how this might be a rare case, since it requires long-running NFS client processes that are doing a lookup on a directory that is not fully connected in the Lustre client cache (e.g. after the Lustre client remounted the filesystem).&lt;/p&gt;

&lt;p&gt;Some strange things I found when looking in the code:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;the Lustre client does not initialize op_valid in ll_get_parent(), which means mdc_getattr_name() calls mdc_pack_body() and sets body-&amp;gt;valid = 0&lt;/li&gt;
	&lt;li&gt;in mdt_lookup_raw() if body-&amp;gt;valid does not have OBD_MD_FLID set the whole function is skipped&lt;/li&gt;
	&lt;li&gt;the client is LASSERTing that the MDS_GETATTR_NAME reply has OBD_MD_FLID set (which should &lt;em&gt;always&lt;/em&gt; be set for the getattr reply, we can&apos;t possibly get attributes for an object without knowing its FID)&lt;/li&gt;
	&lt;li&gt;IOC_MDC_LOOKUP sets OBD_MD_FLID itself before calling mdc_getattr_name()&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I suspect that this is an &quot;always fails&quot; case, but we don&apos;t have a proper test for it, and it only happens in rare cases when NFS exporting the filesystem from a client.  Daire, could you confirm/deny if this happens after the NFS-exporting client has reconnected to the MDS, either after it was restarted or was evicted by the MDS?&lt;/p&gt;

&lt;p&gt;It has been a long time since I looked at the code in question, and the server-side code was reworked in the 2.4 release, so I don&apos;t know for sure that the above actually is the source of the defect, but it is a starting point.&lt;/p&gt;</comment>
                            <comment id="68377" author="daire" created="Fri, 4 Oct 2013 16:06:04 +0000"  >&lt;p&gt;Looking through the logs (client and MDS) it doesn&apos;t look like the client was evicted.&lt;/p&gt;

&lt;p&gt;The only thing accessing the filesystem through NFS atm is a script which runs every 10 minutes which simply stats a bunch of directories. These directories are often deleted by another Lustre client so it is possible a directory is removed at the same time.&lt;/p&gt;</comment>
                            <comment id="71196" author="bobijam" created="Sat, 9 Nov 2013 07:30:13 +0000"  >&lt;p&gt;patch tracking at &lt;a href=&quot;http://review.whamcloud.com/8459&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8459&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="71891" author="daire" created="Tue, 19 Nov 2013 15:24:36 +0000"  >&lt;p&gt;We have applied the patch to an NFS gateway that was consistently crashing 1-2 times a week. I will report back in a week or so.&lt;/p&gt;</comment>
                            <comment id="71947" author="daire" created="Wed, 20 Nov 2013 12:43:33 +0000"  >&lt;p&gt;Well the addition of that patch (+ op_data-&amp;gt;op_valid = OBD_MD_FLID&lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt; alone hasn&apos;t resolved the issue. I just got the same ASSERTION/LBUG. Even with abort_recov on the mountpoint the client/NFS gateway kept crashing over and over. vmcores available on request.&lt;/p&gt;</comment>
                            <comment id="72114" author="bobijam" created="Fri, 22 Nov 2013 09:21:52 +0000"  >&lt;p&gt;yes, please upload the client vmcore (plus supporting files. i.e system.map and vmlinux) as well as MDS lustre logs.&lt;/p&gt;</comment>
                            <comment id="72139" author="daire" created="Fri, 22 Nov 2013 15:28:23 +0000"  >&lt;p&gt;kdump vmcore:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://dl.dropboxusercontent.com/u/24821368/llite_nfs_vmcore.tar.bz2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://dl.dropboxusercontent.com/u/24821368/llite_nfs_vmcore.tar.bz2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The system.map and vmlinuz are the ones from the Whamcloud 2.4.1 rpms. I only recompiled the lustre-modules package with the patch and installed them. Unfortunately I don&apos;t have any MDS debug logs to go with the client crash. As far as the MDS syslog is concerned the client just timed out&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Nov 20 12:06:59 bmds1 kernel: Lustre: 10702:0:(client.c:1868:ptlrpc_expire_one_request()) @@@ Request sent has timed out &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; sent delay: [sent 1384949212/real 0]  req@ffff8823c5fbc000 x1450793259087600/t0(0) o104-&amp;gt;bravo-MDT0000@10.21.20.161@tcp:15/16 lens 296/224 e 0 to 1 dl 1384949219 ref 2 fl Rpc:XN/0/ffffffff rc 0/-1
Nov 20 12:06:59 bmds1 kernel: Lustre: 10702:0:(client.c:1868:ptlrpc_expire_one_request()) Skipped 52 previous similar messages
Nov 20 12:06:59 bmds1 kernel: LustreError: 138-a: bravo-MDT0000: A client on nid 10.21.20.161@tcp was evicted due to a lock blocking callback time out: rc -107
Nov 20 12:08:11 bmds1 kernel: Lustre: 3360:0:(client.c:1868:ptlrpc_expire_one_request()) @@@ Request sent has timed out &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; slow reply: [sent 1384949280/real 1384949280]  req@ffff881e91c8fc00 x1450793259664088/t0(0) o104-&amp;gt;bravo-MDT0000@10.21.20.161@tcp:15/16 lens 296/224 e 0 to 1 dl 1384949291 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
Nov 20 12:08:11 bmds1 kernel: LustreError: 138-a: bravo-MDT0000: A client on nid 10.21.20.161@tcp was evicted due to a lock blocking callback time out: rc -107
Nov 20 12:09:36 bmds1 kernel: Lustre: MGS: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client 579e7714-62da-0239-1436-a6aaef6c3c2e (at 10.21.20.161@tcp) in 228 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff88023ee9f400, cur 1384949376 expire 1384949226 last 1384949148
Nov 20 12:09:36 bmds1 kernel: Lustre: Skipped 5 previous similar messages
Nov 20 12:11:30 bmds1 kernel: Lustre: MGS: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client bfce87ed-5f06-a61e-66b2-29c25777910c (at 10.21.20.161@tcp) in 228 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff8840099f5400, cur 1384949490 expire 1384949340 last 1384949262
Nov 20 12:12:55 bmds1 rshd[15712]: root@tidworth.dhcp.dneg.com as root: cmd=&lt;span class=&quot;code-quote&quot;&gt;&apos;mount | egrep -w -q tidworth &amp;amp;&amp;amp; echo 1&apos;&lt;/span&gt;
Nov 20 12:12:59 bmds1 kernel: Lustre: MGS: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client e329d5cb-9323-ba9b-75eb-08e4bb28f5d6 (at 10.21.20.161@tcp) in 228 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff8821e9671000, cur 1384949579 expire 1384949429 last 1384949351
Nov 20 12:15:06 bmds1 kernel: Lustre: bravo-MDT0000: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client 859d5542-b1cd-fc79-f151-e2a1d616d3b6 (at 10.21.20.161@tcp) in 227 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff88222b768000, cur 1384949706 expire 1384949556 last 1384949479
Nov 20 12:15:06 bmds1 kernel: Lustre: Skipped 3 previous similar messages
Nov 20 12:17:03 bmds1 kernel: Lustre: MGS: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client c7f8980e-3768-eb52-c409-dd0e8c0262b8 (at 10.21.20.161@tcp) in 229 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff882143efd000, cur 1384949823 expire 1384949673 last 1384949594
Nov 20 12:17:03 bmds1 kernel: Lustre: Skipped 3 previous similar messages
Nov 20 12:17:59 bmds1 kernel: Lustre: 30381:0:(client.c:1868:ptlrpc_expire_one_request()) @@@ Request sent has timed out &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; slow reply: [sent 1384949868/real 1384949868]  req@ffff884009987800 x1450793263165844/t0(0) o104-&amp;gt;bravo-MDT0000@10.21.20.161@tcp:15/16 lens 296/224 e 0 to 1 dl 1384949879 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
Nov 20 12:17:59 bmds1 kernel: LustreError: 138-a: bravo-MDT0000: A client on nid 10.21.20.161@tcp was evicted due to a lock blocking callback time out: rc -107
Nov 20 12:19:17 bmds1 kernel: Lustre: MGS: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client 4414062d-c26f-f3be-2991-56b4465da4d2 (at 10.21.20.161@tcp) in 230 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff88222c64d000, cur 1384949957 expire 1384949807 last 1384949727
Nov 20 12:19:17 bmds1 kernel: Lustre: Skipped 3 previous similar messages
Nov 20 12:22:18 bmds1 kernel: Lustre: bravo-MDT0000: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client ab18aa85-db83-fe4a-ee80-c5b97ce1b739 (at 10.21.20.161@tcp) in 227 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp ffff8822262fd000, cur 1384950138 expire 1384949988 last 1384949911
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="72567" author="bobijam" created="Mon, 2 Dec 2013 11:04:32 +0000"  >&lt;p&gt;The Lustre client does not initialize op_valid in ll_get_parent(), which mdc_pack_body() will set it if the inode&apos;s fid is valid (please refer to ll_prep_md_od_data() &lt;/p&gt;
{... op_data-&amp;gt;op_fid1 = *ll_inode2fid(i1); }

&lt;p&gt;Set op_valid in ll_get_parent will ask MDS to retrieve its parent&apos;s fid by name, regardless that the inode&apos;s fid is not available.&lt;/p&gt;

&lt;p&gt;If that still does not stop the LBUG, something wrong on MDS which fails to find its parent&apos;s fid could lead the issue.&lt;/p&gt;</comment>
                            <comment id="72665" author="bobijam" created="Tue, 3 Dec 2013 01:07:16 +0000"  >&lt;p&gt;patch updated at &lt;a href=&quot;http://review.whamcloud.com/8459&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8459&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="72893" author="daire" created="Thu, 5 Dec 2013 13:51:14 +0000"  >&lt;p&gt;patch applied and so far so good.&lt;/p&gt;</comment>
                            <comment id="74253" author="bogl" created="Thu, 2 Jan 2014 18:25:33 +0000"  >&lt;p&gt;backport to b2_5&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/8706&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8706&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="74336" author="pjones" created="Sat, 4 Jan 2014 14:58:57 +0000"  >&lt;p&gt;Landed for 2.6&lt;/p&gt;</comment>
                            <comment id="75228" author="ekolb" created="Fri, 17 Jan 2014 22:24:29 +0000"  >&lt;p&gt;What are the options for 2.4.2? We seem to be suffering from this bug but wish to stay on the stable release.&lt;/p&gt;</comment>
                            <comment id="76053" author="ihara" created="Sun, 2 Feb 2014 06:04:51 +0000"  >&lt;p&gt;We hit same problem on 2.4.2, just posted backport patches for b2_4. &lt;a href=&quot;http://review.whamcloud.com/9092&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9092&lt;/a&gt; Please review on this.&lt;/p&gt;</comment>
                            <comment id="78451" author="knweiss" created="Wed, 5 Mar 2014 13:17:29 +0000"  >&lt;p&gt;Shuichi, we&apos;re testing your 2.4.2 backport since 2014-02-21 and so far it&apos;s seems to have fixed this problem for us. The fix should probably be applied to b2_4, too.&lt;/p&gt;</comment>
                            <comment id="86404" author="schamp" created="Thu, 12 Jun 2014 04:43:43 +0000"  >&lt;p&gt;backport to b1_8&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/10691/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10691/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="90273" author="lawrence_wright" created="Tue, 29 Jul 2014 09:41:12 +0000"  >&lt;p&gt;Could you confirm if the backported 2.4 fix ever made it into 2.4.3? We&apos;ve just seen an instance of this on a 2.4.3 based NFS gateway. Thanks!&lt;/p&gt;</comment>
                            <comment id="90274" author="bobijam" created="Tue, 29 Jul 2014 09:56:41 +0000"  >&lt;p&gt;no, it hasn&apos;t been landed in 2.4 yet (neither for 2.4.3).&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw2n3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10510</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>