<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:21:30 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1999] recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds</title>
                <link>https://jira.whamcloud.com/browse/LU-1999</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/9d79385c-038b-11e2-83cf-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/9d79385c-038b-11e2-83cf-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_106 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;CMD: client-24vm3 lctl get_param -n *.lustre-MDT0000.num_exports | cut -d&apos; &apos; -f2&lt;br/&gt;
Update not seen after 60s: wanted &apos;3&apos; got &apos;4&apos;&lt;br/&gt;
 recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds &lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: recovery-small 106&lt;/p&gt;</description>
                <environment></environment>
        <key id="16054">LU-1999</key>
            <summary>recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="johann">Johann Lombardi</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Thu, 20 Sep 2012 21:44:59 +0000</created>
                <updated>Thu, 18 Apr 2013 20:45:09 +0000</updated>
                            <resolved>Thu, 18 Oct 2012 15:55:08 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="45312" author="green" created="Thu, 20 Sep 2012 21:45:54 +0000"  >&lt;p&gt;Johann, this failure comes from a test added by you yesterday&lt;/p&gt;</comment>
                            <comment id="45319" author="johann" created="Fri, 21 Sep 2012 02:19:45 +0000"  >&lt;p&gt;It looks like a test issue since the client got evicted as expected:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 167-0: lustre-MDT0000-mdc-ffff880078abb000: This client was evicted by lustre-MDT0000; in progress operations using this service will fail.
Lustre: Evicted from MGS (at 10.10.4.118@tcp) after server handle changed from 0x1345e746bb7e9980 to 0x1345e746bb7e9edc
Lustre: MGC10.10.4.118@tcp: Reactivating import
Lustre: DEBUG MARKER: /usr/sbin/lctl mark  recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds 
Lustre: DEBUG MARKER: recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mdd
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The issue is the the client reconnected before the check in the test was made.&lt;/p&gt;</comment>
                            <comment id="45320" author="johann" created="Fri, 21 Sep 2012 02:53:45 +0000"  >&lt;p&gt;I have disabled the test on master until i come up with a test script fix to avoid those failures.&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/4066&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4066&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="45329" author="johann" created="Fri, 21 Sep 2012 05:26:50 +0000"  >&lt;p&gt;Tentative patch:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/4069&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4069&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="46177" author="johann" created="Mon, 8 Oct 2012 12:21:33 +0000"  >&lt;p&gt;Patch landed. No new reoccurrence since then.&lt;/p&gt;</comment>
                            <comment id="46501" author="green" created="Fri, 12 Oct 2012 23:19:59 +0000"  >&lt;p&gt;It seems that I am still hitting this in my local testing (ie SLOW=yes REFORMAT=yes sh recovery-small.sh ), though I cannot speak of the frequence.&lt;br/&gt;
This is with today&apos;s master checkout.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== recovery-small test 106: lightweight connection support == 23:16:05 (1350098165)
fail_loc=0x805
Starting client: centos6-4.localnet: -o user_xattr,flock centos6-4.localnet@tcp:/lustre /mnt/lustre2
fail_loc=0
Filesystem           1K-blocks      Used Available Use% Mounted on
192.168.10.214@tcp:/lustre
                        374928     52384    302004  15% /mnt/lustre
Failing mds1 on node centos6-4.localnet
Stopping /mnt/mds1 (opts:) on centos6-4.localnet
affected facets: mds1
Failover mds1 to centos6-4.localnet
23:16:24 (1350098184) waiting for centos6-4.localnet network 900 secs ...
23:16:24 (1350098184) network interface is UP
Starting mds1:   -o loop /tmp/lustre-mdt1 /mnt/mds1
Started lustre-MDT0000
touch: setting times of `/mnt/lustre2/f.recovery-small.106&apos;: Input/output error
 recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds 
  Trace dump:
  = ./../tests/test-framework.sh:3844:error_noexit()
  = ./../tests/test-framework.sh:3866:error()
  = recovery-small.sh:1528:test_106()
  = ./../tests/test-framework.sh:4108:run_one()
  = ./../tests/test-framework.sh:4138:run_one_logged()
  = ./../tests/test-framework.sh:4009:run_test()
  = recovery-small.sh:1537:main()
Dumping lctl log to /tmp/test_logs/1350095359/recovery-small.test_106.*.1350098192.log
Dumping logs only on local client.
192.168.10.214@tcp:/lustre /mnt/lustre2 lustre rw,flock,user_xattr 0 0
Stopping client centos6-4.localnet /mnt/lustre2 (opts:)
FAIL 106 (32s)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="46502" author="green" created="Fri, 12 Oct 2012 23:39:48 +0000"  >&lt;p&gt;Just did a bit of research, it&apos;s happening, but not super freequently:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@intelbox ~]# grep &apos;lightweight client not evicted by mds&apos; /var/log/remotemessages 
Oct  9 03:43:14 centos6-0 kernel: [ 2489.388687] Lustre: DEBUG MARKER: recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds
Oct  9 13:27:35 centos6-1 kernel: [ 2898.929903] Lustre: DEBUG MARKER: recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds
Oct 11 13:33:30 centos6-0 kernel: [ 6723.295214] Lustre: DEBUG MARKER: recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds
Oct 12 23:16:32 centos6-4 kernel: [ 3261.398403] Lustre: DEBUG MARKER: recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="46553" author="johann" created="Mon, 15 Oct 2012 05:04:35 +0000"  >&lt;p&gt;Oleg, could you please attach dmesg output when the issue happened?&lt;/p&gt;</comment>
                            <comment id="46560" author="liwei" created="Mon, 15 Oct 2012 07:09:03 +0000"  >&lt;p&gt;I saw this with today&apos;s master plus my &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1717&quot; title=&quot;mdt_recovery.c:611:mdt_steal_ack_locks()) Resent req xid XXX has mismatched opc: new 101 old 0&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1717&quot;&gt;&lt;del&gt;LU-1717&lt;/del&gt;&lt;/a&gt; patch locally on a single-node setup:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;
== recovery-small test 106: lightweight connection support == 16:41:54 (1350290514)
fail_loc=0x805
Starting client: h221f: -o user_xattr,flock h221f@tcp:/lustre /mnt/lustre2
Failed to load ZFS module stack.
Load the module manually by running &apos;insmod &amp;lt;location&amp;gt;/zfs.ko&apos; as root.
fail_loc=0
Filesystem           1K-blocks      Used Available Use% Mounted on
h221f@tcp:/lustre       374928     51792    303136  15% /mnt/lustre
Failing mds1 on node h221f
Stopping /mnt/mds1 (opts:) on h221f
affected facets: mds1
Failover mds1 to h221f
16:42:05 (1350290525) waiting for h221f network 900 secs ...
16:42:05 (1350290525) network interface is UP
Starting mds1:   -o loop /tmp/lustre-mdt1 /mnt/mds1
Failed to load ZFS module stack.
Load the module manually by running &apos;insmod &amp;lt;location&amp;gt;/zfs.ko&apos; as root.
Started lustre-MDT0000
touch: setting times of `/mnt/lustre2/f.recovery-small.106&apos;: Input/output error
 recovery-small test_106: @@@@@@ FAIL: lightweight client not evicted by mds 
  Trace dump:
  = ./../tests/test-framework.sh:3869:error_noexit()
  = ./../tests/test-framework.sh:3891:error()
  = recovery-small.sh:1528:test_106()
  = ./../tests/test-framework.sh:4133:run_one()
  = ./../tests/test-framework.sh:4163:run_one_logged()
  = ./../tests/test-framework.sh:4034:run_test()
  = recovery-small.sh:1537:main()
Dumping lctl log to /tmp/test_logs/1350288504/recovery-small.test_106.*.1350290525.log
Dumping logs only on local client.
192.168.56.4@tcp:/lustre /mnt/lustre2 lustre rw,flock,user_xattr 0 0
Stopping client h221f /mnt/lustre2 (opts:)
sh: lsof: command not found
FAIL 106 (12s)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The logs will be attached in a minute.&lt;/p&gt;</comment>
                            <comment id="46572" author="johann" created="Mon, 15 Oct 2012 10:13:59 +0000"  >&lt;p&gt;hm, strange, the message is in the lustre log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000100:02020000:0.0:1350290525.905481:0:6240:0:(import.c:1325:ptlrpc_import_recovery_state_machine()) 167-0: lustre-MDT0000-mdc-ffff880006762400: This client was evicted by lustre-MDT0000; in progress operations using this service will fail.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;but not in dmesg output ...&lt;/p&gt;</comment>
                            <comment id="46689" author="johann" created="Wed, 17 Oct 2012 17:18:43 +0000"  >&lt;p&gt;I can now reproduce. The issue comes from the console rate limit code which skips the messages expected by the test. &lt;/p&gt;</comment>
                            <comment id="46690" author="johann" created="Wed, 17 Oct 2012 17:44:28 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,4288&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4288&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="46750" author="johann" created="Thu, 18 Oct 2012 15:55:08 +0000"  >&lt;p&gt;Patch landed. Close the bug again.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11968" name="lu-1999-logs.tar.bz2" size="124088" author="liwei" created="Mon, 15 Oct 2012 07:11:44 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv3j3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4071</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>