<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:34:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17286] recovery-small test_66 timeout</title>
                <link>https://jira.whamcloud.com/browse/LU-17286</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for S Buisson &amp;lt;sbuisson@ddn.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/8bc8339c-402c-46d0-8b05-420e0f7ead5a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/8bc8339c-402c-46d0-8b05-420e0f7ead5a&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_66 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Timeout occurred after 166 minutes, last suite running was recovery-small
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Test session details:&lt;br/&gt;
clients: &lt;a href=&quot;https://build.whamcloud.com/job/lustre-reviews/100300&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.whamcloud.com/job/lustre-reviews/100300&lt;/a&gt; - 4.18.0-477.21.1.el8_8.x86_64&lt;br/&gt;
servers: &lt;a href=&quot;https://build.whamcloud.com/job/lustre-reviews/100300&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.whamcloud.com/job/lustre-reviews/100300&lt;/a&gt; - 4.18.0-477.21.1.el8_lustre.x86_64&lt;/p&gt;

&lt;p&gt;Test output ends with:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;CMD: onyx-49vm6 /usr/sbin/lctl set_param -n mdt.lustre-MDT0000.evict_client 93213b02-0ca9-4e1e-8437-34d3c201a0a9
/usr/lib64/lustre/tests/test-framework.sh: line 3718: /mnt/lustre/recon: Input/output error
CMD: onyx-49vm1.onyx.whamcloud.com,onyx-49vm2 /usr/bin/lfs df /mnt/lustre; uname -n &amp;gt;&amp;gt; /mnt/lustre/recon
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;






&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
recovery-small test_66 - Timeout occurred after 166 minutes, last suite running was recovery-small&lt;/p&gt;</description>
                <environment></environment>
        <key id="78924">LU-17286</key>
            <summary>recovery-small test_66 timeout</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 13 Nov 2023 17:43:54 +0000</created>
                <updated>Tue, 9 Jan 2024 13:47:07 +0000</updated>
                            <resolved>Tue, 9 Jan 2024 10:37:49 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="394656" author="adilger" created="Tue, 28 Nov 2023 22:52:32 +0000"  >&lt;p&gt;It looks like this issue is related to GSS, with the client being evicted from the MDS (deliberately), getting an RPC error from the OSS, and then never making further progress.&lt;/p&gt;

&lt;p&gt;MDS logs show:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 4532.190289] Lustre: DEBUG MARKER: == recovery-small test 66: lock enqueue re-send vs client eviction === 05:45:08 (1701150308)
[ 4532.458517] Lustre: DEBUG MARKER: lctl set_param fail_loc=0x80000157
[ 4532.670053] Lustre: *** cfs_fail_loc=157, val=2147483648***
[ 4532.671470] LustreError: 14380:0:(ldlm_lib.c:3278:target_send_reply_msg()) @@@ dropping reply  req@00000000e6ef81e4 x1783780844763008/t0(0) o101-&amp;gt;6f45c8eb-3f53-49b5-bd8e-541944a4c347@10.240.41.232@tcp:219/0 lens 576/688 e 0 to 0 dl 1701150364 ref 1 fl Interpret:/200/0 rc 0/0 job:&apos;stat.0&apos; uid:0 gid:0
[ 4534.620423] Lustre: lustre-MDT0000: Client 6f45c8eb-3f53-49b5-bd8e-541944a4c347 (at 10.240.41.232@tcp) reconnecting
[ 4534.622463] Lustre: Skipped 1 previous similar message
[ 4534.787142] Lustre: DEBUG MARKER: lctl set_param fail_loc=0
[ 4536.790311] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param -n mdt.lustre-MDT0000.evict_client 6f45c8eb-3f53-49b5-bd8e-541944a4c347
[ 4536.954163] Lustre: 138089:0:(genops.c:1623:obd_export_evict_by_uuid()) lustre-MDT0000: evicting 6f45c8eb-3f53-49b5-bd8e-541944a4c347 at adminstrative request
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;OSS logs show:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 4532.043858] Lustre: DEBUG MARKER: == recovery-small test 66: lock enqueue re-send vs client eviction === 05:45:08 (1701150308)
[ 4534.268230] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x80000136
[ 4538.317046] Lustre: 19301:0:(sec_gss.c:686:gss_cli_ctx_handle_err_notify()) req x1783781103120832/t0, ctx 00000000be17c5f3 idx 0x13da74db5b288b54(0-&amp;gt;c): reverseserver respond (00080000/00000000)
[ 4538.320271] Lustre: 19301:0:(sec_gss.c:686:gss_cli_ctx_handle_err_notify()) Skipped 1 previous similar message
[ 4538.322145] LustreError: 19301:0:(client.c:1491:after_reply()) @@@ unwrap reply failed: rc = -22  req@0000000093c1e5d9 x1783781103120832/t0(0) o105-&amp;gt;lustre-OST0000@10.240.41.232@tcp:15/16 lens 392/224 e 0 to 0 dl 1701150331 ref 1 fl Rpc:RQU/0/ffffffff rc 0/-1 job:&apos;&apos; uid:4294967295 gid:4294967295
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Client logs show (boot clock is offset by -87.25s based on &quot;&lt;tt&gt;test 66&lt;/tt&gt;&quot; line):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 4444.948121] Lustre: DEBUG MARKER: == recovery-small test 66: lock enqueue re-send vs client eviction === 05:45:08 (1701150308)
[ 4445.418200] Lustre: DEBUG MARKER: stat /mnt/lustre/f66.recovery-small
[ 4449.721569] LustreError: 167-0: lustre-MDT0000-mdc-ffff94079bcca000: This client was evicted by lustre-MDT0000; in progress operations using this service will fail.
[ 4449.903814] Lustre: DEBUG MARKER: /usr/bin/lfs df /mnt/lustre; uname -n &amp;gt;&amp;gt; /mnt/lustre/recon
[ 4451.221178] Lustre: 127393:0:(gss_svc_upcall.c:1620:gss_svc_upcall_get_ctx()) Invalid gss ctx idx 0x13da74db5b288b54 from 10.240.41.234@tcp
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="394659" author="adilger" created="Tue, 28 Nov 2023 23:05:13 +0000"  >&lt;p&gt;It looks like this subtest started failing on 2023-11-13 (2.15.58.101, master only) for several &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17173&quot; title=&quot;Cleanup lfs flushctx&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17173&quot;&gt;&lt;del&gt;LU-17173&lt;/del&gt;&lt;/a&gt; patches, but they haven&apos;t landed yet.&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/search?horizon=2332800&amp;amp;status%5B%5D=TIMEOUT&amp;amp;test_set_script_id=f36cabd0-32c3-11e0-a61c-52540025f9ae&amp;amp;sub_test_script_id=e7e8f4f4-3a66-11e4-b82a-5254006e85c2&amp;amp;source=sub_tests#redirect&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/search?horizon=2332800&amp;amp;status%5B%5D=TIMEOUT&amp;amp;test_set_script_id=f36cabd0-32c3-11e0-a61c-52540025f9ae&amp;amp;sub_test_script_id=e7e8f4f4-3a66-11e4-b82a-5254006e85c2&amp;amp;source=sub_tests#redirect&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;A fair number of patches were landed on 2023-11-08 and tagged 2.15.59.0, so it isn&apos;t clear which patch may have introduced the problem.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;6521c313f7  New tag 2.15.59
6a6e4ee20f LU-17184 mgc: remove damaged local configs
21295b169b LU-17213 llite: check sdio before freeing it
87ca3cffe6 LU-17259 lnet: kgnilnd_nl_get should return 0 on success
5afe3b0538 LU-17258 socklnd: ensure connection type established upon race
c4c9a8eea3 LU-17256 debian: allow building client dkms on arm64
982eca73a9 LU-17000 coverity: Fix Logically dead code under liblnetconfig.c
b83156304d LU-17254 lnet: Fix ofed detection with specific kernel version
9ba375983d LU-17249 ptlrpc: protect scp_rqbd_idle list operations
ee56161ea0 LU-17000 coverity: Fix Dereference after null under client.c
edb968d04f LU-17242 debug: remove CFS_CHECK_STACK
4b290188fb LU-16868 tests: skip conf-sanity/32 in interop
37a50f74da LU-16796 lfsck: Change lfsck_assistant_object to use kref
d7e3e7c104 LU-16796 target: Change struct job_stat to use kref
a12c352a3d LU-17205 utils: add lctl get_param -H option
8fa3532b1e LU-17204 lod: don&apos;t panic on short LOVEA
f2f8b6deaf LU-17203 libcfs: ignore remaining items
1759ae751a LU-16796 ofd: Change struct ofd_seq to use refcount_t
e420e92ac9 LU-17196 tests: sanity-lnet test_310 MR support
6aede12548 LU-16518 rsync: fix new clang error in lustre_rsync.c
36b14a23a6 LU-17207 lnet: race b/w monitor thr stop and discovery push
1b694ad04f LU-16896 flr: resync could mess mirror content
d1fadf8e8a LU-17132 kernel: update RHEL 8.8 [4.18.0-477.27.1.el8_8]
67e0d9e40a LU-17191 osc: only call xa_insert for new entries
ee0e9447e7 LU-17115 quota: fix race of acquiring locks in qmt
ecea24d843 LU-17071 o2iblnd: IBLND_REJECT_EARLY condition causes Oops
05c97b1096 LU-17232 build: fix ext4-misc for el7.6 server
24d515367f LU-9859 libcfs: migrate libcfs_mem.c to lnet/lib-mem.c
b0cc96a1ff LU-17131 ldiskfs: el9.2 encdata and filename-encode
57ac32a223 LU-16097 quota: release preacquired quota when over limits
6fbffd9c09 LU-14361 statahead: add tunable for fname pattern statahead
753e058b4c LU-4974 lod: Change pool_desc to &quot;[lod|lov]_pool_desc&quot;
cb5f92c0e3 LU-10391 ksocklnd: use ksocknal_protocol v4 for IPv6
02b22df643 LU-17235 o2iblnd: adding alias ib interface causes crash
6ad9ef1fec LU-17225 utils: check_iam print records
b5fde4d6c0 LU-17197 obdclass: preserve fairness when waiting for rpc slot
68254c484a LU-10391 lnet: handle discovery with Netlink
4512347d6c LU-16356 hsm: add running ref to the coordinator
a772e90243 LU-16032 osd: move unlink of large objects to separate thread
2c97684db9 LU-17181 misc: don&apos;t block reclaim threads
a8e66b899a LU-17103 lnet: use workqueue for lnd ping buffer updates
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="394712" author="eaujames" created="Wed, 29 Nov 2023 09:21:00 +0000"  >&lt;p&gt;I hit the same type of issue on &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17228&quot; title=&quot;sanity test_36i: expect 200 got 1698166494&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17228&quot;&gt;&lt;del&gt;LU-17228&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="398932" author="sebastien" created="Tue, 9 Jan 2024 07:55:18 +0000"  >&lt;p&gt;Issue fixed with landing of patch &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17317&quot; title=&quot;sanity-sec test_16: test all_off:60001:c0:60003:003, wanted 1 1, got 0 0&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17317&quot;&gt;&lt;del&gt;LU-17317&lt;/del&gt;&lt;/a&gt; gss: do not continue using expired reverse context&quot; &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/53375&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/53375&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="398938" author="adilger" created="Tue, 9 Jan 2024 09:09:34 +0000"  >&lt;p&gt;Patch included in 2.15.60.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="79199">LU-17317</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="78573">LU-17228</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i041pr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>