<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:31:20 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3142] recovery-mds-scale test_failover_mds: dd: writing `/mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com/dd-file&apos;: Bad file descriptor</title>
                <link>https://jira.whamcloud.com/browse/LU-3142</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While running recovery-mds-scale test_failover_mds, dd operation failed on one of the client nodes as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2013-04-08 22:25:26: dd run starting
+ mkdir -p /mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com
+ /usr/bin/lfs setstripe -c -1 /mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com
+ cd /mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com
++ /usr/bin/lfs df /mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com
+ FREE_SPACE=12963076
+ BLKS=2916692
+ echo &apos;Free disk space is 12963076, 4k blocks to dd is 2916692&apos;
+ load_pid=8739
+ wait 8739
+ dd bs=4k count=2916692 status=noxfer if=/dev/zero of=/mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com/dd-file
dd: writing `/mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com/dd-file&apos;: Bad file descriptor
295176+0 records in
295175+0 records out
+ &apos;[&apos; 1 -eq 0 &apos;]&apos;
++ date &apos;+%F %H:%M:%S&apos;
+ echoerr &apos;2013-04-08 22:27:28: dd failed&apos;
+ echo &apos;2013-04-08 22:27:28: dd failed&apos;
2013-04-08 22:27:28: dd failed
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/68bce4aa-a1bb-11e2-bdac-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/68bce4aa-a1bb-11e2-bdac-52540035b04c&lt;/a&gt;&lt;/p&gt;</description>
                <environment>&lt;br/&gt;
Lustre Branch: master&lt;br/&gt;
Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1381/&quot;&gt;http://build.whamcloud.com/job/lustre-master/1381/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.3/x86_64&lt;br/&gt;
Test Group: failover&lt;br/&gt;
FAILURE_MODE=HARD&lt;br/&gt;
</environment>
        <key id="18314">LU-3142</key>
            <summary>recovery-mds-scale test_failover_mds: dd: writing `/mnt/lustre/d0.dd-client-32vm5.lab.whamcloud.com/dd-file&apos;: Bad file descriptor</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="yujian">Jian Yu</reporter>
                        <labels>
                            <label>LB</label>
                    </labels>
                <created>Wed, 10 Apr 2013 15:13:52 +0000</created>
                <updated>Thu, 6 Jun 2013 12:50:13 +0000</updated>
                            <resolved>Sat, 27 Apr 2013 21:04:19 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="56029" author="adilger" created="Wed, 10 Apr 2013 17:41:38 +0000"  >&lt;p&gt;Are we able to pass any MDS failovers, or do they fail 100% of the time?  It appears that this test failed immediately on the first MDS failover, but we don&apos;t have any useful logs from the MDS, so it is difficult to know why the OSTs were evicted.&lt;/p&gt;</comment>
                            <comment id="56043" author="pjones" created="Wed, 10 Apr 2013 18:36:43 +0000"  >&lt;p&gt;Hongchao&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="56062" author="yujian" created="Thu, 11 Apr 2013 04:18:34 +0000"  >&lt;p&gt;The recovery-*-scale tests on master branch have been blocked by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2008&quot; title=&quot;After hardware reboot (using pm) the node cannot be accessed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2008&quot;&gt;&lt;del&gt;LU-2008&lt;/del&gt;&lt;/a&gt;. After the issue was fixed 2 days ago, the hard failover tests were started being performed by autotest. I submitted &lt;a href=&quot;http://review.whamcloud.com/6013&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/6013&lt;/a&gt; to reproduce the issue.&lt;/p&gt;</comment>
                            <comment id="56195" author="hongchao.zhang" created="Fri, 12 Apr 2013 10:48:07 +0000"  >&lt;p&gt;the logs in MDS doesn&apos;t contain any valid info about Lustre.&lt;/p&gt;

&lt;p&gt;the error &quot;Bad file descriptor&quot; (-EBADFD) is not a common error, there is only one place in Lustre (in ll_statahead_interpret),&lt;br/&gt;
and in Linux, it&apos;s only in the following modules&lt;/p&gt;

&lt;p&gt;driver/&lt;br/&gt;
    isdn, net, macintosh, ieee1394, atm, media, usb&lt;br/&gt;
fs/&lt;br/&gt;
    jfss2, ncpfs&lt;br/&gt;
net/&lt;br/&gt;
    iucv, atm, 9p, bluetooth&lt;br/&gt;
sound/&lt;br/&gt;
    core, drivers, usb&lt;/p&gt;

&lt;p&gt;then this error could come from driver modules, or trigger at user space.&lt;/p&gt;</comment>
                            <comment id="56216" author="adilger" created="Fri, 12 Apr 2013 18:12:43 +0000"  >&lt;p&gt;Is this actually -EBADF (which is a different error code)?   Are there any messages about that in the console log?  Are you sure that this was build 1381 (commit 49b06fba39e7fec26a0250ed37f04a620e349b5f) being tested?   If it was a later build it might have been caused by commit &lt;a href=&quot;http://review.whamcloud.com/5820&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5820&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="56312" author="yujian" created="Mon, 15 Apr 2013 12:09:12 +0000"  >&lt;blockquote&gt;&lt;p&gt;Is this actually -EBADF (which is a different error code)? Are there any messages about that in the console log? Are you sure that this was build 1381 (commit 49b06fba39e7fec26a0250ed37f04a620e349b5f) being tested? If it was a later build it might have been caused by commit &lt;a href=&quot;http://review.whamcloud.com/5820&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5820&lt;/a&gt;.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;I did not find -EBADF(-9) or -EBADFD(-77) in the console logs. Due to TT-1107, the console logs were not gathered completely in the Maloo report. Please refer to the attached tarball. I&apos;m sure this was build &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1381/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/1381/&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The debug patch in &lt;a href=&quot;http://review.whamcloud.com/#change,6013&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,6013&lt;/a&gt; has been waiting for test resource for 3 days. I&apos;ve to start manual test run to reproduce this issue.&lt;/p&gt;</comment>
                            <comment id="56325" author="yujian" created="Mon, 15 Apr 2013 15:29:05 +0000"  >&lt;blockquote&gt;&lt;p&gt;it could be related to &lt;a href=&quot;http://review.whamcloud.com/5820&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5820&lt;/a&gt;&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Hi Hongchao, build &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1381/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/1381/&lt;/a&gt; does not contain the above patch.&lt;/p&gt;</comment>
                            <comment id="56329" author="yujian" created="Mon, 15 Apr 2013 16:26:52 +0000"  >&lt;p&gt;While I trying to reproduce the issue in this ticket on Rosso cluster, I hit &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3175&quot; title=&quot;recovery-mds-scale test_failover_mds: unlink ./clients/client1/~dmtmp/PWRPNT/PPTC112.TMP failed (Read-only file system)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3175&quot;&gt;&lt;del&gt;LU-3175&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="56536" author="hongchao.zhang" created="Thu, 18 Apr 2013 11:25:17 +0000"  >&lt;p&gt;in the logs of &lt;a href=&quot;http://review.whamcloud.com/#change,6013&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,6013&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;the stripe data of the file used by &quot;dd&quot; is missed after failover, then it set the layout type of the file to Empty,&lt;br/&gt;
which causes this error (-9, Bad file descriptor)&lt;/p&gt;

&lt;p&gt;...&lt;br/&gt;
00020000:00000001:0.0:1366159710.913287:0:8913:0:(lov_object.c:359:lov_fini_raid0()) Process entered&lt;br/&gt;
00020000:00000010:0.0:1366159710.913288:0:8913:0:(lov_object.c:362:lov_fini_raid0()) kfreed &apos;r0-&amp;gt;lo_sub&apos;: 56 at ffff88007d4e6940.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913290:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d1840.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913292:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d17c0.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913293:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d1740.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913293:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d16c0.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913294:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d1640.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913294:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d15c0.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913295:0:8913:0:(lov_ea.c:131:lsm_free_plain()) slab-freed &apos;lsm-&amp;gt;lsm_oinfo&lt;span class=&quot;error&quot;&gt;&amp;#91;i&amp;#93;&lt;/span&gt;&apos;: 112 at ffff8800373d1540.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913296:0:8913:0:(lov_ea.c:133:lsm_free_plain()) kfreed &apos;lsm&apos;: 128 at ffff88007b7cbc40.&lt;br/&gt;
00020000:00000001:0.0:1366159710.913297:0:8913:0:(lov_object.c:369:lov_fini_raid0()) Process leaving&lt;br/&gt;
00000020:00001000:0.0:1366159710.913297:0:8913:0:(cl_object.c:896:cl_env_put()) 1@ffff88007a373138&lt;br/&gt;
00000020:00000010:0.0:1366159710.913298:0:8913:0:(lu_object.c:421:lu_global_key_fini()) kfreed &apos;info&apos;: 512 at ffff8800175e4200.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913299:0:8913:0:(cl_object.c:1090:cl0_key_fini()) kfreed &apos;info&apos;: 600 at ffff8800435e3000.&lt;br/&gt;
00000008:00000010:0.0:1366159710.913300:0:8913:0:(osc_dev.c:135:osc_key_fini()) slab-freed &apos;(info)&apos;: 984 at ffff88002ad267f0.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913301:0:8913:0:(lov_dev.c:159:lov_key_fini()) slab-freed &apos;(info)&apos;: 288 at ffff88007db76be8.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913302:0:8913:0:(lcommon_cl.c:146:ccc_key_fini()) slab-freed &apos;(info)&apos;: 352 at ffff88007db048e0.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913303:0:8913:0:(vvp_dev.c:96:vvp_key_fini()) slab-freed &apos;(info)&apos;: 488 at ffff88002ad28420.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913304:0:8913:0:(lu_object.c:1531:keys_fini()) kfreed &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff8800175e4400.&lt;br/&gt;
00000008:00000010:0.0:1366159710.913305:0:8913:0:(osc_dev.c:159:osc_session_fini()) slab-freed &apos;(info)&apos;: 424 at ffff88007db05590.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913305:0:8913:0:(lov_dev.c:183:lov_session_key_fini()) slab-freed &apos;(info)&apos;: 400 at ffff88007db74508.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913306:0:8913:0:(lcommon_cl.c:164:ccc_session_key_fini()) slab-freed &apos;(session)&apos;: 184 at ffff88007a36b1f8.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913307:0:8913:0:(vvp_dev.c:114:vvp_session_key_fini()) slab-freed &apos;(session)&apos;: 104 at ffff88007a36a7b0.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913308:0:8913:0:(lu_object.c:1531:keys_fini()) kfreed &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff880030d75800.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913309:0:8913:0:(cl_object.c:787:cl_env_fini()) slab-freed &apos;(cle)&apos;: 168 at ffff88007a373138.&lt;br/&gt;
00020000:00000001:0.0:1366159710.913310:0:8913:0:(lov_object.c:653:lov_layout_change()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00020000:00000001:0.0:1366159710.913311:0:8913:0:(lov_object.c:731:lov_conf_set()) Process leaving&lt;br/&gt;
00020000:00000001:0.0:1366159710.913312:0:8913:0:(lov_object.c:735:lov_conf_set()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000020:00000001:0.0:1366159710.913313:0:8913:0:(cl_object.c:329:cl_conf_set()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000020:00001000:0.0:1366159710.913314:0:8913:0:(cl_object.c:896:cl_env_put()) 1@ffff88007a373e58&lt;br/&gt;
00000020:00000010:0.0:1366159710.913315:0:8913:0:(lu_object.c:421:lu_global_key_fini()) kfreed &apos;info&apos;: 512 at ffff88001fc15c00.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913316:0:8913:0:(cl_object.c:1090:cl0_key_fini()) kfreed &apos;info&apos;: 600 at ffff880049dc7c00.&lt;br/&gt;
00000008:00000010:0.0:1366159710.913317:0:8913:0:(osc_dev.c:135:osc_key_fini()) slab-freed &apos;(info)&apos;: 984 at ffff88007b751418.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913318:0:8913:0:(lov_dev.c:159:lov_key_fini()) slab-freed &apos;(info)&apos;: 288 at ffff88007a370728.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913319:0:8913:0:(lcommon_cl.c:146:ccc_key_fini()) slab-freed &apos;(info)&apos;: 352 at ffff88007a36f5e0.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913320:0:8913:0:(vvp_dev.c:96:vvp_key_fini()) slab-freed &apos;(info)&apos;: 488 at ffff88007a36e238.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913320:0:8913:0:(lu_object.c:1531:keys_fini()) kfreed &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff88001765f400.&lt;br/&gt;
00000008:00000010:0.0:1366159710.913321:0:8913:0:(osc_dev.c:159:osc_session_fini()) slab-freed &apos;(info)&apos;: 424 at ffff88002ac25e18.&lt;br/&gt;
00020000:00000010:0.0:1366159710.913322:0:8913:0:(lov_dev.c:183:lov_session_key_fini()) slab-freed &apos;(info)&apos;: 400 at ffff88007db74828.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913323:0:8913:0:(lcommon_cl.c:164:ccc_session_key_fini()) slab-freed &apos;(session)&apos;: 184 at ffff88007a36b870.&lt;br/&gt;
00000080:00000010:0.0:1366159710.913323:0:8913:0:(vvp_dev.c:114:vvp_session_key_fini()) slab-freed &apos;(session)&apos;: 104 at ffff88007a36a0c8.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913324:0:8913:0:(lu_object.c:1531:keys_fini()) kfreed &apos;ctx-&amp;gt;lc_value&apos;: 320 at ffff880017664000.&lt;br/&gt;
00000020:00000010:0.0:1366159710.913325:0:8913:0:(cl_object.c:787:cl_env_fini()) slab-freed &apos;(cle)&apos;: 168 at ffff88007a373e58.&lt;br/&gt;
00000080:00000001:0.0:1366159710.913326:0:8913:0:(file.c:3236:ll_layout_conf()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00010000:00000001:0.0:1366159710.913327:0:8913:0:(ldlm_lock.c:211:ldlm_lock_put()) Process entered&lt;br/&gt;
00010000:00000001:0.0:1366159710.913328:0:8913:0:(ldlm_lock.c:244:ldlm_lock_put()) Process leaving&lt;br/&gt;
00010000:00000001:0.0:1366159710.913329:0:8913:0:(ldlm_lock.c:609:__ldlm_handle2lock()) Process entered&lt;br/&gt;
00000020:00000001:0.0:1366159710.913329:0:8913:0:(lustre_handles.c:172:class_handle2object()) Process entered&lt;br/&gt;
00000020:00000001:0.0:1366159710.913330:0:8913:0:(lustre_handles.c:195:class_handle2object()) Process leaving (rc=18446612134411306496 : -131939298245120 : ffff88007cff0600)&lt;br/&gt;
00010000:00000001:0.0:1366159710.913331:0:8913:0:(ldlm_lock.c:621:__ldlm_handle2lock()) Process leaving (rc=18446612134411306496 : -131939298245120 : ffff88007cff0600)&lt;br/&gt;
00010000:00000001:0.0:1366159710.913332:0:8913:0:(ldlm_lock.c:876:ldlm_lock_decref_internal()) Process entered&lt;br/&gt;
00010000:00010000:0.0:1366159710.913333:0:8913:0:(ldlm_lock.c:849:ldlm_lock_decref_internal_nolock()) ### ldlm_lock_decref(CR) ns: lustre-MDT0000-mdc-ffff880037d22800 lock: ffff88007cff0600/0x6e4fd575de0e4a90 lrc: 3/1,0 mode: CR/CR res: 8589935616/3 bits 0x8 rrc: 1 type: IBT flags: 0x20000000000 nid: local remote: 0xb524751f9485880b expref: -99 pid: 8913 timeout: 0 lvb_type: 3&lt;br/&gt;
00010000:00000001:0.0:1366159710.913335:0:8913:0:(ldlm_lock.c:211:ldlm_lock_put()) Process entered&lt;br/&gt;
00010000:00000001:0.0:1366159710.913335:0:8913:0:(ldlm_lock.c:244:ldlm_lock_put()) Process leaving&lt;br/&gt;
00010000:00010000:0.0:1366159710.913337:0:8913:0:(ldlm_lock.c:931:ldlm_lock_decref_internal()) ### add lock into lru list ns: lustre-MDT0000-mdc-ffff880037d22800 lock: ffff88007cff0600/0x6e4fd575de0e4a90 lrc: 2/0,0 mode: CR/CR res: 8589935616/3 bits 0x8 rrc: 1 type: IBT flags: 0x20000000000 nid: local remote: 0xb524751f9485880b expref: -99 pid: 8913 timeout: 0 lvb_type: 3&lt;br/&gt;
00010000:00000001:0.0:1366159710.913341:0:8913:0:(ldlm_lock.c:312:ldlm_lock_add_to_lru()) Process entered&lt;br/&gt;
00010000:00000001:0.0:1366159710.913342:0:8913:0:(ldlm_lock.c:316:ldlm_lock_add_to_lru()) Process leaving&lt;br/&gt;
00010000:00000001:0.0:1366159710.913342:0:8913:0:(ldlm_lock.c:952:ldlm_lock_decref_internal()) Process leaving&lt;br/&gt;
00010000:00000001:0.0:1366159710.913343:0:8913:0:(ldlm_lock.c:211:ldlm_lock_put()) Process entered&lt;br/&gt;
00010000:00000001:0.0:1366159710.913343:0:8913:0:(ldlm_lock.c:244:ldlm_lock_put()) Process leaving&lt;br/&gt;
00000080:00000001:0.0:1366159710.913344:0:8913:0:(file.c:3339:ll_layout_lock_set()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000080:00000001:0.0:1366159710.913345:0:8913:0:(file.c:3445:ll_layout_refresh()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000080:00000001:0.0:1366159710.913346:0:8913:0:(vvp_io.c:1220:vvp_io_init()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00020000:00000001:0.0:1366159710.913347:0:8913:0:(lov_io.c:939:lov_io_init_empty()) Process entered&lt;br/&gt;
00020000:00000001:0.0:1366159710.913347:0:8913:0:(lov_io.c:968:lov_io_init_empty()) Process leaving (rc=1 : 1 : 1)&lt;br/&gt;
00000020:00000001:0.0:1366159710.913348:0:8913:0:(cl_io.c:178:cl_io_init0()) Process leaving (rc=1 : 1 : 1)&lt;br/&gt;
00000020:00000001:0.0:1366159710.913349:0:8913:0:(cl_io.c:239:cl_io_rw_init()) Process leaving (rc=1 : 1 : 1)&lt;br/&gt;
00000080:00000001:0.0:1366159710.913350:0:8913:0:(file.c:920:ll_file_io_generic()) Process leaving via out (rc=18446744073709551607 : -9 : 0xfffffffffffffff7)&lt;br/&gt;
...&lt;/p&gt;


&lt;p&gt;but the log of MDS is unavailable (it&apos;s at another node &quot;wtm-15vm7&quot;), and Yujian will help to collect it manually and will check it once the log is ready.&lt;/p&gt;</comment>
                            <comment id="56603" author="yujian" created="Fri, 19 Apr 2013 00:57:20 +0000"  >&lt;p&gt;Lustre Branch: master&lt;br/&gt;
Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1406/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/1406/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.3/x86_64&lt;br/&gt;
Test Group: failover&lt;br/&gt;
FAILURE_MODE=HARD&lt;/p&gt;

&lt;p&gt;Please look into this report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/3d4c2120-a849-11e2-ba78-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/3d4c2120-a849-11e2-ba78-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="56616" author="hongchao.zhang" created="Fri, 19 Apr 2013 11:04:28 +0000"  >&lt;p&gt;this bug should be caused by the wrong size of &quot;MD&quot; in MDT,&lt;/p&gt;

&lt;p&gt;00000004:00000001:21.0:1366291257.871077:0:4881:0:(mdd_object.c:268:mdd_xattr_get()) Process entered&lt;br/&gt;
00000004:00000001:21.0:1366291257.871080:0:4881:0:(lod_object.c:373:lod_xattr_get()) Process entered&lt;br/&gt;
00000004:00000001:21.0:1366291257.871085:0:4881:0:(lod_object.c:377:lod_xattr_get()) Process leaving (rc=176 : 176 : b0)&lt;br/&gt;
00000004:00000001:21.0:1366291257.871087:0:4881:0:(mdd_object.c:281:mdd_xattr_get()) Process leaving (rc=176 : 176 : b0)&lt;br/&gt;
00000004:00020000:21.0:1366291257.871089:0:4881:0:(mdt_lvb.c:158:mdt_lvbo_fill()) lustre-MDT0000: expected 176 actual 128.&lt;br/&gt;
00000004:00000001:21.0:1366291257.880256:0:4881:0:(mdt_lvb.c:159:mdt_lvbo_fill()) Process leaving via out (rc=18446744073709551582 : -34 : 0xffffffffffffffde)&lt;/p&gt;

&lt;p&gt;1, the default value of mdt_device-&amp;gt;mdt_max_mdsize is 128bytes,&lt;br/&gt;
   #define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)&lt;/p&gt;

&lt;p&gt;2, before failover, the MD size is changed to 176bytes = (sizeof(struct lov_mds_md) + 6 * sizeof(struct lov_ost_data)&lt;br/&gt;
   mdt_device-&amp;gt;mdt_max_mdsize is updated accordingly (see mdt_attr_get_lov, will update &quot;mdt_max_mdsize&quot; in &quot;getattr&quot; request)&lt;/p&gt;

&lt;p&gt;3, after failover, the new MDT doesn&apos;t know the actual mdt_max_mdsize, and still use the default value, then client calls ll_layout_refresh&lt;br/&gt;
   to get the MD and the MDT will failed with &lt;del&gt;ERANGE for there is still no &quot;getattr&quot; request to update mdt_device&lt;/del&gt;&amp;gt;mdt_max_mdsize!&lt;/p&gt;

&lt;p&gt;the patch is tracked at &lt;a href=&quot;http://review.whamcloud.com/#change,6102&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,6102&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="56670" author="yujian" created="Mon, 22 Apr 2013 07:39:10 +0000"  >&lt;p&gt;A patch for master branch to gather the logs on passive server nodes in failure configuration: &lt;a href=&quot;http://review.whamcloud.com/6112&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/6112&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="57152" author="adilger" created="Fri, 26 Apr 2013 18:16:04 +0000"  >&lt;p&gt;Not closing this bug until the redundant getxattr call has been cleaned up per inspection comments.&lt;/p&gt;</comment>
                            <comment id="57195" author="pjones" created="Sat, 27 Apr 2013 21:04:19 +0000"  >&lt;p&gt;Efficiency of solution will be improved under &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2818&quot; title=&quot;Failure on test suite parallel-scale-nfsv4 test_compilebench: (mdt_lvb.c:126:mdt_lvbo_fill()) ASSERTION( rc == 0 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2818&quot;&gt;&lt;del&gt;LU-2818&lt;/del&gt;&lt;/a&gt;. Current solution sufficient for 2.4&lt;/p&gt;</comment>
                            <comment id="60080" author="simmonsja" created="Thu, 6 Jun 2013 11:38:04 +0000"  >&lt;p&gt;I don&apos;t know how it got passed you build system but patch &lt;a href=&quot;http://review.whamcloud.com/#change,6102&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,6102&lt;/a&gt; is missing the&lt;br/&gt;
function fid_build_from_res_name.&lt;/p&gt;

&lt;p&gt;lustre-2.4.0/lustre/mdt/mdt_handler.c: In function &#8216;mdt_intent_layout&#8217;:&lt;br/&gt;
lustre-2.4.0/lustre/mdt/mdt_handler.c:3754: error: implicit declaration of function &#8216;fid_build_from_res_name&#8217;&lt;/p&gt;</comment>
                            <comment id="60082" author="simmonsja" created="Thu, 6 Jun 2013 11:42:59 +0000"  >&lt;p&gt;I found it. Patch &lt;a href=&quot;http://review.whamcloud.com/#change,4501&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4501&lt;/a&gt; removed the fid_build_from_res_name function. Creating patch and testing... &lt;/p&gt;</comment>
                            <comment id="60083" author="simmonsja" created="Thu, 6 Jun 2013 12:27:23 +0000"  >&lt;p&gt;Created a patch to fix this issue.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,6566&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,6566&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="60084" author="pjones" created="Thu, 6 Jun 2013 12:50:13 +0000"  >&lt;p&gt;James please could you open a new ticket to track this bug? It sounds like what you are describing is a regression introduced into this work (which was included in 2.4.0) by the work from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2193&quot; title=&quot;lvbo_init failed for resource XXX: rc -2, after recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2193&quot;&gt;&lt;del&gt;LU-2193&lt;/del&gt;&lt;/a&gt; (which happened since 2.4.0) so it would be easier to create a new ticket and link it to the other two related tickets. It can get really confusing trying to work out the situation for a given bug if there are commits to it spanning release boundaries.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="12512" name="recovery-mds-scale.test_failover_mds.console.tar.bz2" size="12991" author="yujian" created="Mon, 15 Apr 2013 12:09:12 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvnjj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7628</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>