<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:51:20 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5420] Failure on test suite sanity test_17m: mount MDS failed, Input/output error</title>
                <link>https://jira.whamcloud.com/browse/LU-5420</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/16302020-14ed-11e4-bb6a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/16302020-14ed-11e4-bb6a-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_17m failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Hit this bug in many tests,  the env is configured as 1 MDS with 2 MDTs.  Didn&apos;t hit this error when the configuration is 2 MDSs with 2 MDTs&lt;br/&gt;
client console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;CMD: onyx-46vm7 mkdir -p /mnt/mds1
CMD: onyx-46vm7 test -b /dev/lvm-Role_MDS/P1
Starting mds1:   /dev/lvm-Role_MDS/P1 /mnt/mds1
CMD: onyx-46vm7 mkdir -p /mnt/mds1; mount -t lustre   		                   /dev/lvm-Role_MDS/P1 /mnt/mds1
onyx-46vm7: mount.lustre: mount /dev/mapper/lvm--Role_MDS-P1 at /mnt/mds1 failed: Input/output error
onyx-46vm7: Is the MGS running?
Start of /dev/lvm-Role_MDS/P1 on mds1 failed 5
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>client and server: lustre-b2_6-rc2 RHEL6 ldiskfs DNE mode</environment>
        <key id="25764">LU-5420</key>
            <summary>Failure on test suite sanity test_17m: mount MDS failed, Input/output error</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="di.wang">Di Wang</assignee>
                                    <reporter username="sarah">Sarah Liu</reporter>
                        <labels>
                            <label>HB</label>
                            <label>dne</label>
                            <label>patch</label>
                    </labels>
                <created>Sat, 26 Jul 2014 19:08:35 +0000</created>
                <updated>Tue, 19 Mar 2019 15:15:57 +0000</updated>
                            <resolved>Mon, 11 May 2015 17:36:00 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>16</watches>
                                                                            <comments>
                            <comment id="90121" author="sarah" created="Sat, 26 Jul 2014 19:09:17 +0000"  >&lt;p&gt;mds console&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;11:01:42:Lustre: DEBUG MARKER: mkdir -p /mnt/mds1; mount -t lustre   		                   /dev/lvm-Role_MDS/P1 /mnt/mds1
11:01:42:LDISKFS-fs (dm-0): mounted filesystem with ordered data mode. quota=on. Opts: 
11:01:42:LustreError: 166-1: MGC10.2.4.243@tcp: Connection to MGS (at 0@lo) was lost; in progress operations using this service will fail
11:01:43:Lustre: Evicted from MGS (at MGC10.2.4.243@tcp_0) after server handle changed from 0x5efc70d9d01b7154 to 0x5efc70d9d01e1f23
11:01:43:LustreError: 18197:0:(obd_mount_server.c:1165:server_register_target()) lustre-MDT0000: error registering with the MGS: rc = -108 (not fatal)
11:01:45:LustreError: 15c-8: MGC10.2.4.243@tcp: The configuration from log &apos;lustre-MDT0000&apos; failed (-5). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.
11:01:45:Lustre: MGC10.2.4.243@tcp: Connection restored to MGS (at 0@lo)
11:01:46:LustreError: 18197:0:(obd_mount_server.c:1297:server_start_targets()) failed to start server lustre-MDT0000: -5
11:01:46:LustreError: 18197:0:(obd_mount_server.c:1769:server_fill_super()) Unable to start targets: -5
11:01:46:LustreError: 18197:0:(obd_mount_server.c:1496:server_put_super()) no obd lustre-MDT0000
11:01:47:LustreError: 18197:0:(obd_mount.c:1342:lustre_fill_super()) Unable to mount  (-5)
11:01:47:Lustre: DEBUG MARKER: lctl set_param -n mdt.lustre*.enable_remote_dir=1
11:01:47:LustreError: 137-5: lustre-MDT0000_UUID: not available for connect from 10.2.4.244@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
11:01:47:LustreError: Skipped 11 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="90123" author="di.wang" created="Sat, 26 Jul 2014 21:58:01 +0000"  >&lt;p&gt;Sigh, this is brought in by &lt;a href=&quot;http://review.whamcloud.com/#/c/9967/6&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9967/6&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4913&quot; title=&quot;mgc import reconnect race&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4913&quot;&gt;&lt;del&gt;LU-4913&lt;/del&gt;&lt;/a&gt; mgc: mgc import reconnect race&lt;/p&gt;

&lt;p&gt;mgc import can be reconnected by pinger or&lt;br/&gt;
ptlrpc_reconnect_import().&lt;br/&gt;
ptlrpc_invalidate_import() isn&apos;t protected against&lt;br/&gt;
alteration of imp_invalid state. Import can be&lt;br/&gt;
reconnected by pinger which makes imp_invalid&lt;br/&gt;
equal to false. Thus LASSERT(imp-&amp;gt;imp_invalid) fails&lt;br/&gt;
in ptlrpc_invalidate_import().&lt;/p&gt;

&lt;p&gt;It is safe to call ptlrpc_invalidate_import() when&lt;br/&gt;
import is deactivated, but ptlrpc_reconnect_import() doesn&apos;t&lt;br/&gt;
deactivate it.&lt;br/&gt;
Let&apos;s use only pinger when available to reconnect import&lt;/p&gt;


&lt;p&gt;Hmm, in ptlrpc_reconnect_import, the patch does not force the the import to reconnect to the server, instead it only check the import status, which seems wrong. Given that the import status might change later soon. I think the intention here to make the import is refreshed and connected after this call. &lt;/p&gt;

&lt;p&gt;Though I am not so sure what the patch are trying to resolve here, since comment is a bit confused to me. I think there two options to fix the problem&lt;/p&gt;

&lt;p&gt;1. revert the patch 9967, or just revert the change in ptlrpc_reconnect_import to force reconnect anyway.&lt;br/&gt;
2. mgc should retry to enqueue and get log when it meet invalid import (by checking the return value -ESHUTDOWN, see ptlrpc_import_delay_req).&lt;/p&gt;

&lt;p&gt;I will cook these two patches to see which one is better. &lt;/p&gt;</comment>
                            <comment id="90124" author="di.wang" created="Sat, 26 Jul 2014 22:10:17 +0000"  >&lt;p&gt;Btw: We hit this until now because our current maloo DNE configuration is a bit different then this final FULL release test. Here we are using 2 MDTs in MDS, where in maloo DNE test, we also use 2 MDSes, but 1 MDS only 1 MDTs, the other MDS has 3 MDTs. &lt;/p&gt;

&lt;p&gt;This failure will only be hit when there are multiple MDTs on the first MDS. &lt;/p&gt;</comment>
                            <comment id="90125" author="di.wang" created="Sat, 26 Jul 2014 22:43:43 +0000"  >&lt;p&gt;Option 1:  &lt;a href=&quot;http://review.whamcloud.com/11241&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11241&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Option 2:  &lt;a href=&quot;http://review.whamcloud.com/11240&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11240&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Personally I like option 1, but I am not sure whether reverting the patch will bring &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4913&quot; title=&quot;mgc import reconnect race&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4913&quot;&gt;&lt;del&gt;LU-4913&lt;/del&gt;&lt;/a&gt; back, the patch description is a bit confused to me.&lt;/p&gt;</comment>
                            <comment id="90253" author="adilger" created="Mon, 28 Jul 2014 23:19:12 +0000"  >&lt;p&gt;The option #2 patch is testing well on my local system (single-node 2x MDT, 3x OST, client) which was having solid test failures in sanity.sh test_17m and test_17o (which I&apos;d incorrectly attributed to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1538&quot; title=&quot;cleanup test scripts&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1538&quot;&gt;&lt;del&gt;LU-1538&lt;/del&gt;&lt;/a&gt; patch &lt;a href=&quot;http://review.whamcloud.com/10481&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10481&lt;/a&gt; that was reverted).&lt;/p&gt;

&lt;p&gt;I&apos;ve pushed an updated version of the 11240 patch at &lt;a href=&quot;http://review.whamcloud.com/11258&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11258&lt;/a&gt; with improved comments and removing some noise from the console.  Since this might be a blocker I didn&apos;t refresh the original 11240 patch so that it could continue testing, but I&apos;d prefer that the 11258 version land if it is ready.&lt;/p&gt;</comment>
                            <comment id="90473" author="adilger" created="Wed, 30 Jul 2014 19:07:35 +0000"  >&lt;p&gt;It seems that this patch is repeatedly failing insanity, even when it is running on b2_6.  The failures are marked as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5077&quot; title=&quot;insanity test_1: out of memory on MDT in crypto_create_tfm()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5077&quot;&gt;&lt;del&gt;LU-5077&lt;/del&gt;&lt;/a&gt;, but I don&apos;t think that is the real reason.  I suspect there is some other problem with this patch that needs to be investigated.&lt;/p&gt;</comment>
                            <comment id="90477" author="adilger" created="Wed, 30 Jul 2014 19:30:56 +0000"  >&lt;p&gt;I verified that virtually all of the test failures marked &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5077&quot; title=&quot;insanity test_1: out of memory on MDT in crypto_create_tfm()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5077&quot;&gt;&lt;del&gt;LU-5077&lt;/del&gt;&lt;/a&gt; are actually from the three versions of the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; patches, which fail &quot;insanity&quot; and &quot;conf-sanity&quot; repeatedly.  Due to the presence of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5425&quot; title=&quot;Test failure on test suite conf-sanity, subtest test_32a (mount OST failed)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5425&quot;&gt;&lt;del&gt;LU-5425&lt;/del&gt;&lt;/a&gt;, I&apos;m not 100% positive that those are caused by this patch, but definitely the insanity failures.&lt;/p&gt;</comment>
                            <comment id="90478" author="di.wang" created="Wed, 30 Jul 2014 19:35:37 +0000"  >&lt;p&gt;Hmm, I think insanity failures should be related with the fix from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt;. I am looking at it now.  &lt;/p&gt;</comment>
                            <comment id="90482" author="di.wang" created="Wed, 30 Jul 2014 19:56:11 +0000"  >&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;7/30/14, 12:54:36 PM&amp;#93;&lt;/span&gt; wangdi: the failure of insanity is because of with the fix of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5425&quot; title=&quot;Test failure on test suite conf-sanity, subtest test_32a (mount OST failed)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5425&quot;&gt;&lt;del&gt;LU-5425&lt;/del&gt;&lt;/a&gt;, MDT will insist MGS must be started, i.e. MDS setup process will wait there until MGS is setup. But in insanity test_1,  it start mdt2 first, then mdt1/mgs, that is why the test will fail&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;7/30/14, 12:55:32 PM&amp;#93;&lt;/span&gt; wangdi: so can we just fix the test case here, because it seems to me MGS must be setup first&lt;/p&gt;</comment>
                            <comment id="90563" author="di.wang" created="Thu, 31 Jul 2014 17:31:56 +0000"  >&lt;p&gt;Sigh, most of insanity test starts MDT or OST before MGS, that is why it cause so many insanity failure with this patch. So if &quot;starting mgs before other targets&quot; is a requirement then we need fix insanity.&lt;/p&gt;</comment>
                            <comment id="91343" author="jhammond" created="Mon, 11 Aug 2014 22:13:19 +0000"  >&lt;p&gt;Test specific issues aside, we need to fix this as putting 2 MDTs from a FS on a single node will be a likely failover configuration.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;t:lustre-release# export LUSTRE=$HOME/lustre-release/lustre
t:lustre-release# export MDSCOUNT=2
t:lustre-release# llmount.sh
...
Starting mds1:   -o loop /tmp/lustre-mdt1 /mnt/mds1
Started lustre-MDT0000
Starting mds2:   -o loop /tmp/lustre-mdt2 /mnt/mds2
Started lustre-MDT0001
Starting ost1:   -o loop /tmp/lustre-ost1 /mnt/ost1
Started lustre-OST0000
Starting ost2:   -o loop /tmp/lustre-ost2 /mnt/ost2
Started lustre-OST0001
Starting client: t:  -o user_xattr,flock t@tcp:/lustre /mnt/lustre
Using TIMEOUT=20
seting jobstats to procname_uid
Setting lustre.sys.jobid_var from disable to procname_uid
Waiting 90 secs for update
Updated after 3s: wanted &apos;procname_uid&apos; got &apos;procname_uid&apos;
disable quota as required
t:lustre-release# umount /mnt/mds1
t:lustre-release# mount /tmp/lustre-mdt1 /mnt/mds1 -o loop -t lustre
mount.lustre: mount /dev/loop0 at /mnt/mds1 failed: Input/output error
Is the MGS running?
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="95996" author="adilger" created="Thu, 9 Oct 2014 06:18:39 +0000"  >&lt;p&gt;Without this patch I&apos;m also not able to test past sanity.sh test_17m and test_17n without a shared MGS+MDS failing to mount due to -EIO and causing testing to hang until I remount the MDS.  I&apos;m able to mount it manually after 2 or 3 tries, so there must be some kind of startup race between the MDS and the MGS.  Once I applied this patch I made it through all of sanity.sh and sanityn.sh with multiple MDS remounts without problems until I hit a memory allocation deadlock running dbench that looks unrelated.&lt;/p&gt;</comment>
                            <comment id="96353" author="adilger" created="Tue, 14 Oct 2014 21:14:21 +0000"  >&lt;p&gt;The patch is still failing with a hang at unmount time (this failed in four separate conf-sanity runs in different subtests):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;01:52:25:INFO: task umount:26263 blocked for more than 120 seconds.
01:52:25:      Tainted: G        W  ---------------    2.6.32-431.23.3.el6_lustre.g9f5284f.x86_64 #1
01:52:26:umount        D 0000000000000000     0 26263  26262 0x00000080
01:52:27:Call Trace:
01:52:27: [&amp;lt;ffffffff8152b6e5&amp;gt;] rwsem_down_failed_common+0x95/0x1d0
01:52:27: [&amp;lt;ffffffff8152b843&amp;gt;] rwsem_down_write_failed+0x23/0x30
01:52:28: [&amp;lt;ffffffff8128f7f3&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
01:52:28: [&amp;lt;ffffffffa0b13cd1&amp;gt;] client_disconnect_export+0x61/0x460 [ptlrpc]
01:52:28: [&amp;lt;ffffffffa058975a&amp;gt;] lustre_common_put_super+0x28a/0xbf0 [obdclass]
01:52:28: [&amp;lt;ffffffffa05bc508&amp;gt;] server_put_super+0x198/0xe50 [obdclass]
01:52:29: [&amp;lt;ffffffff8118b23b&amp;gt;] generic_shutdown_super+0x5b/0xe0
01:52:29: [&amp;lt;ffffffff8118b326&amp;gt;] kill_anon_super+0x16/0x60
01:52:29: [&amp;lt;ffffffffa0580d06&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
01:52:29: [&amp;lt;ffffffff8118bac7&amp;gt;] deactivate_super+0x57/0x80
01:52:29: [&amp;lt;ffffffff811ab4cf&amp;gt;] mntput_no_expire+0xbf/0x110
01:52:29: [&amp;lt;ffffffff811ac01b&amp;gt;] sys_umount+0x7b/0x3a0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="96451" author="di.wang" created="Wed, 15 Oct 2014 22:09:10 +0000"  >&lt;p&gt;Just updated the patch.&lt;/p&gt;</comment>
                            <comment id="98038" author="sergey" created="Fri, 31 Oct 2014 09:57:54 +0000"  >&lt;p&gt;Hello&lt;/p&gt;

&lt;p&gt;We hit these problem in xyratex and have another solution &lt;a href=&quot;http://review.whamcloud.com/#/c/12515/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/12515/&lt;/a&gt;.&lt;br/&gt;
Hope it could be helpful.&lt;/p&gt;</comment>
                            <comment id="99401" author="gerrit" created="Mon, 17 Nov 2014 20:02:53 +0000"  >&lt;p&gt;Sergey Cheremencev (sergey_cheremencev@xyratex.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/12515&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12515&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; mgc: process config logs only in mgc_requeue_thread()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 3&lt;br/&gt;
Commit: 1c3148dd8645cfa94bf3c36cfbe41176334ad4c5&lt;/p&gt;</comment>
                            <comment id="99439" author="yujian" created="Mon, 17 Nov 2014 23:40:15 +0000"  >&lt;p&gt;While running replay-dual tests on master branch with MDSCOUNT=4, the same failure occurred:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/33dfc794-6dba-11e4-9d65-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/33dfc794-6dba-11e4-9d65-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/5cb7b7f8-6dba-11e4-9d65-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/5cb7b7f8-6dba-11e4-9d65-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="99465" author="sergey" created="Tue, 18 Nov 2014 09:29:26 +0000"  >&lt;p&gt;In seagate these bug is occurred only when MDT and MGS are on same node, in case when mdt starts earlier than mgs.&lt;/p&gt;

&lt;p&gt;When MDT is on separate node it uses LOCAL configuration if can&apos;t retrieve it from MGS.&lt;br/&gt;
But when MDT and MGS are on same node MDT can&apos;t use LOCAL configuration:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        /* Copy the setup log locally if we can. Don&apos;t mess around if we&apos;re
         * running an MGS though (logs are already local). */
        if (lctxt &amp;amp;&amp;amp; lsi &amp;amp;&amp;amp; IS_SERVER(lsi) &amp;amp;&amp;amp; !IS_MGS(lsi) &amp;amp;&amp;amp;
            cli-&amp;gt;cl_mgc_configs_dir != NULL &amp;amp;&amp;amp;
            lu2dt_dev(cli-&amp;gt;cl_mgc_configs_dir-&amp;gt;do_lu.lo_dev) ==
            lsi-&amp;gt;lsi_dt_dev) {
....
       } else {
                if (local_only) /* no local log at client side */
                        GOTO(out_pop, rc = -EIO);
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="99488" author="di.wang" created="Tue, 18 Nov 2014 17:54:08 +0000"  >&lt;p&gt;Hmm, I thought there are two problems here&lt;br/&gt;
1. it is not just MGS and MDT are not share the same node, if several targets are sharing the same mgc, you will meet similar problem, because after  &lt;a href=&quot;http://review.whamcloud.com/#/c/9967&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9967&lt;/a&gt; is landed, we can not make sure the import is FULL before mgc enqueue lock and retrieve logs, unless the MGC is new.&lt;br/&gt;
2. How can we make sure the local config log is stale or not. I think that is the reason we saw &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5658&quot; title=&quot;sanity test_17n: destroy remote dir error 0&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5658&quot;&gt;&lt;del&gt;LU-5658&lt;/del&gt;&lt;/a&gt;, where the local config is stale.&lt;/p&gt;</comment>
                            <comment id="106051" author="simmonsja" created="Fri, 6 Feb 2015 17:50:50 +0000"  >&lt;p&gt;As a note I don&apos;t see this is my regular RHEL testing but I can constantly reproduce this problem with my 3.12 kernel setup. This is with the MGS and MDS each being on separate nodes.&lt;/p&gt;</comment>
                            <comment id="106307" author="gerrit" created="Mon, 9 Feb 2015 19:02:33 +0000"  >&lt;p&gt;Alexey Lyashkov (alexey.lyashkov@seagate.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13693&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13693&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; mgc: fix reconnect&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: ccfca18ad2ae9acb84dbfc4c0b2217bd10a0589d&lt;/p&gt;</comment>
                            <comment id="107147" author="jlevi" created="Tue, 17 Feb 2015 18:46:36 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/12515/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/12515/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="107540" author="gerrit" created="Fri, 20 Feb 2015 19:43:50 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13832&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13832&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; revert part of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4913&quot; title=&quot;mgc import reconnect race&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4913&quot;&gt;&lt;del&gt;LU-4913&lt;/del&gt;&lt;/a&gt;&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 260e150f98f07fa68fb124348ca9540e77fed100&lt;/p&gt;</comment>
                            <comment id="107626" author="gerrit" created="Sun, 22 Feb 2015 19:02:45 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13838&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13838&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; revert part of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4913&quot; title=&quot;mgc import reconnect race&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4913&quot;&gt;&lt;del&gt;LU-4913&lt;/del&gt;&lt;/a&gt;&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_7&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 77856caa2468dd69cfa5796bceb22c32aacf402f&lt;/p&gt;</comment>
                            <comment id="108198" author="gerrit" created="Fri, 27 Feb 2015 05:59:13 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13838/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13838/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; ptlrpc: revert ptlrpc_reconnect_import() changes&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_7&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 02739a078f54b5ccdf49456fd0d1daea90472a8d&lt;/p&gt;</comment>
                            <comment id="108225" author="jlevi" created="Fri, 27 Feb 2015 12:58:41 +0000"  >&lt;p&gt;Patches landed to Master.&lt;/p&gt;</comment>
                            <comment id="108226" author="pjones" created="Fri, 27 Feb 2015 13:00:47 +0000"  >&lt;p&gt;Actually Jodi the patches for master are still in flight. It is simply a workaround fix that has landed to b2_7&lt;/p&gt;</comment>
                            <comment id="108229" author="jlevi" created="Fri, 27 Feb 2015 13:41:22 +0000"  >&lt;p&gt;Yes, my apologies.&lt;/p&gt;</comment>
                            <comment id="109975" author="simmonsja" created="Wed, 18 Mar 2015 13:33:22 +0000"  >&lt;p&gt;I see many patches for this. Which patches are valid?&lt;/p&gt;</comment>
                            <comment id="110017" author="di.wang" created="Wed, 18 Mar 2015 17:27:53 +0000"  >&lt;p&gt;For now, you can use this &lt;a href=&quot;http://review.whamcloud.com/13838/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13838/&lt;/a&gt; , but that only reverts the patch  (&lt;a href=&quot;http://review.whamcloud.com/#/c/9967/)which&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9967/)which&lt;/a&gt; cause the problem, and not real fix. &lt;/p&gt;

&lt;p&gt;There are patches trying to fix this problem, but none of them are satisfied by everyone. So leave it to 2.8 for now.&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/13693&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13693&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/11258&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11258&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="114432" author="gerrit" created="Wed, 6 May 2015 21:20:04 +0000"  >&lt;p&gt;Andreas Dilger (andreas.dilger@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/11258/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11258/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5420&quot; title=&quot;Failure on test suite sanity test_17m: mount MDS failed, Input/output error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5420&quot;&gt;&lt;del&gt;LU-5420&lt;/del&gt;&lt;/a&gt; mgc: MGC should retry for invalid import&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 77d406a0699307e8e633ef41f8984f45c09db9b8&lt;/p&gt;</comment>
                            <comment id="114894" author="pjones" created="Mon, 11 May 2015 17:36:01 +0000"  >&lt;p&gt;Landed for 2.8&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="24235">LU-4913</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="25720">LU-5404</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="24748">LU-5077</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="25727">LU-5407</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="24985">LU-5130</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="37208">LU-8206</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwse7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15076</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>