<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:24:42 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16176] replay-dual test_26: timeout &quot;Network is sluggish?&quot;</title>
                <link>https://jira.whamcloud.com/browse/LU-16176</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for eaujames &amp;lt;eaujames@ddn.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/aab59261-7e97-4100-80ca-8ce857853721&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/aab59261-7e97-4100-80ca-8ce857853721&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_26 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Timeout occurred after 125 minutes, last suite running was replay-dual
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Test log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;11:57:35 (1663329455) targets are mounted
11:57:35 (1663329455) facet_failover done
CMD: onyx-99vm1.onyx.whamcloud.com,onyx-99vm2 PATH=/usr/lib64/lustre/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:/opt/iozone/bin:/opt/iozone/bin:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lustre/tests/racer:/usr/lib64/lustre/../lustre-iokit/sgpdd-survey:/usr/lib64/lustre/tests:/usr/lib64/lustre/utils/gss:/usr/lib64/lustre/utils:/usr/lib64/openmpi/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/sbin:/sbin:/bin::/sbin:/bin:/usr/sbin: NAME=autotest_config bash rpc.sh wait_import_state_mount \(FULL\|IDLE\) mdc.lustre-MDT0000-mdc-*.mds_server_uuid 
onyx-99vm1: onyx-99vm1.onyx.whamcloud.com: executing wait_import_state_mount (FULL|IDLE) mdc.lustre-MDT0000-mdc-*.mds_server_uuid
onyx-99vm2: onyx-99vm2.onyx.whamcloud.com: executing wait_import_state_mount (FULL|IDLE) mdc.lustre-MDT0000-mdc-*.mds_server_uuid
onyx-99vm1: CMD: onyx-99vm1.onyx.whamcloud.com lctl get_param -n at_max
onyx-99vm1: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
onyx-99vm2: CMD: onyx-99vm2.onyx.whamcloud.com lctl get_param -n at_max
onyx-99vm2: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
/usr/lib64/lustre/tests/replay-dual.sh: line 936: 119703 Killed                  ( local tar_dir=$DIR/$tdir/run_tar; while true; do
    test_mkdir -p -c$MDSCOUNT $tar_dir || break; if [ $MDSCOUNT -ge 2 ]; then
        $LFS setdirstripe -D -c$MDSCOUNT $tar_dir || error &quot;set default dirstripe failed&quot;;
    fi; cd $tar_dir || break; tar cf - /etc | tar xf - || error &quot;tar failed&quot;; cd $DIR/$tdir || break; rm -rf $tar_dir || break;
done )
/usr/lib64/lustre/tests/replay-dual.sh: line 936: 119704 Killed                  ( local dbench_dir=$DIR2/$tdir/run_dbench; while true; do
    test_mkdir -p -c$MDSCOUNT $dbench_dir || break; if [ $MDSCOUNT -ge 2 ]; then
        $LFS setdirstripe -D -c$MDSCOUNT $dbench_dir || error &quot;set default dirstripe failed&quot;;
    fi; cd $dbench_dir || break; rundbench 1 -D $dbench_dir -t 100 &amp;amp;&amp;gt; /dev/null || break; cd $DIR/$tdir || break; rm -rf $dbench_dir || break;
done )
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Client hang + evictions:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[Fri Sep 16 11:57:02 2022] Lustre: DEBUG MARKER: test_26 fail mds1 5 times
[Fri Sep 16 11:57:28 2022] LustreError: 128420:0:(ldlm_resource.c:1125:ldlm_resource_complain()) MGC10.240.28.47@tcp: namespace resource [0x65727473756c:0x2:0x0].0x0 (0000000042d378ae) refcount nonzero (2) after lock cleanup; forcing cleanup.
[Fri Sep 16 11:57:35 2022] Lustre: DEBUG MARKER: PATH=/usr/lib64/lustre/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:/opt/iozone/bin:/opt/iozone/bin:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lustre/tests/racer:/usr/lib64/lustre/../lustre-iokit/sgpdd-survey:/usr/lib64/lustre/tests:/us
[Fri Sep 16 11:57:37 2022] Lustre: DEBUG MARKER: /usr/sbin/lctl mark onyx-99vm1.onyx.whamcloud.com: executing wait_import_state_mount \(FULL\|IDLE\) mdc.lustre-MDT0000-mdc-\*.mds_server_uuid
[Fri Sep 16 11:57:37 2022] Lustre: DEBUG MARKER: onyx-99vm1.onyx.whamcloud.com: executing wait_import_state_mount (FULL|IDLE) mdc.lustre-MDT0000-mdc-*.mds_server_uuid
[Fri Sep 16 11:57:37 2022] Lustre: DEBUG MARKER: lctl get_param -n at_max
[Fri Sep 16 11:57:38 2022] Lustre: DEBUG MARKER: /usr/sbin/lctl mark mdc.lustre-MDT0000-mdc-\*.mds_server_uuid in FULL state after 0 sec
[Fri Sep 16 11:57:38 2022] Lustre: DEBUG MARKER: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
[Fri Sep 16 11:57:58 2022] LustreError: 167-0: lustre-OST0000-osc-ffff970f6a66e000: This client was evicted by lustre-OST0000; in progress operations using this service will fail.
[Fri Sep 16 11:58:19 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 11:58:19 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
[Fri Sep 16 11:58:19 2022] LustreError: 129930:0:(import.c:389:ptlrpc_invalidate_import()) lustre-OST0003_UUID: Unregistering RPCs found (0). Network is sluggish? Waiting for them to error out.
[Fri Sep 16 11:58:27 2022] INFO: task kworker/u4:2:75980 blocked for more than 120 seconds.
[Fri Sep 16 11:58:27 2022]       Tainted: G           OE    --------- -  - 4.18.0-348.7.1.el8_5.x86_64 #1
[Fri Sep 16 11:58:27 2022] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[Fri Sep 16 11:58:27 2022] task:kworker/u4:2    state:D stack:    0 pid:75980 ppid:     2 flags:0x80004080
[Fri Sep 16 11:58:27 2022] Workqueue: writeback wb_workfn (flush-lustre-ffff970f6a)
[Fri Sep 16 11:58:27 2022] Call Trace:
[Fri Sep 16 11:58:27 2022]  __schedule+0x2bd/0x760
[Fri Sep 16 11:58:27 2022]  schedule+0x37/0xa0
[Fri Sep 16 11:58:27 2022]  rwsem_down_read_slowpath+0x360/0x3d0
[Fri Sep 16 11:58:27 2022]  lov_io_init+0xbe/0x330 [lov]
[Fri Sep 16 11:58:27 2022]  cl_io_init0.isra.14+0x86/0x150 [obdclass]
[Fri Sep 16 11:58:27 2022]  cl_sync_file_range+0x247/0x340 [lustre]
[Fri Sep 16 11:58:27 2022]  ll_writepages+0x70/0x1e0 [lustre]
[Fri Sep 16 11:58:27 2022]  do_writepages+0x41/0xd0
[Fri Sep 16 11:58:27 2022]  __writeback_single_inode+0x39/0x2f0
[Fri Sep 16 11:58:27 2022]  writeback_sb_inodes+0x1e6/0x450
[Fri Sep 16 11:58:27 2022]  __writeback_inodes_wb+0x5f/0xc0
[Fri Sep 16 11:58:27 2022]  wb_writeback+0x25b/0x2f0
[Fri Sep 16 11:58:27 2022]  ? get_nr_inodes+0x35/0x50
[Fri Sep 16 11:58:27 2022]  wb_workfn+0x37a/0x4c0
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x35/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x41/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x35/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x41/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x35/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x41/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x35/0x70
[Fri Sep 16 11:58:27 2022]  ? __switch_to_asm+0x41/0x70
[Fri Sep 16 11:58:27 2022]  process_one_work+0x1a7/0x360
[Fri Sep 16 11:58:27 2022]  worker_thread+0x30/0x390
[Fri Sep 16 11:58:27 2022]  ? create_worker+0x1a0/0x1a0
[Fri Sep 16 11:58:27 2022]  kthread+0x116/0x130
[Fri Sep 16 11:58:27 2022]  ? kthread_flush_work_fn+0x10/0x10
[Fri Sep 16 11:58:27 2022]  ret_from_fork+0x35/0x40
[Fri Sep 16 11:58:39 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 11:58:39 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
[Fri Sep 16 11:58:39 2022] LustreError: 129930:0:(import.c:389:ptlrpc_invalidate_import()) lustre-OST0003_UUID: Unregistering RPCs found (0). Network is sluggish? Waiting for them to error out.
[Fri Sep 16 11:59:00 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 11:59:00 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
[Fri Sep 16 11:59:00 2022] LustreError: 129930:0:(import.c:389:ptlrpc_invalidate_import()) lustre-OST0003_UUID: Unregistering RPCs found (0). Network is sluggish? Waiting for them to error out.
[Fri Sep 16 11:59:15 2022] LustreError: 129934:0:(ldlm_resource.c:1125:ldlm_resource_complain()) MGC10.240.28.47@tcp: namespace resource [0x65727473756c:0x2:0x0].0x0 (0000000042d378ae) refcount nonzero (1) after lock cleanup; forcing cleanup.
[Fri Sep 16 11:59:20 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 11:59:20 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
[Fri Sep 16 11:59:20 2022] LustreError: 129930:0:(import.c:389:ptlrpc_invalidate_import()) lustre-OST0003_UUID: Unregistering RPCs found (0). Network is sluggish? Waiting for them to error out.
[Fri Sep 16 11:59:30 2022] LustreError: 167-0: lustre-OST0004-osc-ffff970f6a66e000: This client was evicted by lustre-OST0004; in progress operations using this service will fail.
[Fri Sep 16 11:59:30 2022] LustreError: Skipped 3 previous similar messages
[Fri Sep 16 11:59:41 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 11:59:41 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
[Fri Sep 16 11:59:41 2022] LustreError: 129930:0:(import.c:389:ptlrpc_invalidate_import()) lustre-OST0003_UUID: Unregistering RPCs found (0). Network is sluggish? Waiting for them to error out.
[Fri Sep 16 12:00:01 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 12:00:01 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
[Fri Sep 16 12:00:01 2022] LustreError: 129930:0:(import.c:389:ptlrpc_invalidate_import()) lustre-OST0003_UUID: Unregistering RPCs found (0). Network is sluggish? Waiting for them to error out.
[Fri Sep 16 12:00:22 2022] LustreError: 129930:0:(import.c:355:ptlrpc_invalidate_import()) lustre-OST0003_UUID: timeout waiting for callback (1 != 0)
[Fri Sep 16 12:00:22 2022] LustreError: 129930:0:(import.c:378:ptlrpc_invalidate_import()) @@@ still on sending list  req@00000000bafea35b x1744123210643072/t0(0) o4-&amp;gt;lustre-OST0003-osc-ffff970f6a66e000@10.240.28.46@tcp:6/4 lens 4584/448 e 0 to 0 dl 1663329411 ref 2 fl Rpc:REQ/0/ffffffff rc -5/-1 job:&apos;dbench.0&apos;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Some test sessions seem to have similar symptoms:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/sub_tests/c66f8021-f1c7-4a32-aeb4-c4089e285d40&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/sub_tests/c66f8021-f1c7-4a32-aeb4-c4089e285d40&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/sub_tests/0cc71756-9e3d-435a-b9cc-abdfb7092970&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/sub_tests/0cc71756-9e3d-435a-b9cc-abdfb7092970&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/sub_tests/3ee2a550-fc29-4096-bb82-b15dc3dbb151&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/sub_tests/3ee2a550-fc29-4096-bb82-b15dc3dbb151&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/sub_tests/aa2fae70-180d-491a-82d0-aa5b65482493&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/sub_tests/aa2fae70-180d-491a-82d0-aa5b65482493&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/sub_tests/89610b7b-87e2-48de-9f3d-ba73a946f08c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/sub_tests/89610b7b-87e2-48de-9f3d-ba73a946f08c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
replay-dual test_26 - Timeout occurred after 125 minutes, last suite running was replay-dual&lt;/p&gt;</description>
                <environment></environment>
        <key id="72439">LU-16176</key>
            <summary>replay-dual test_26: timeout &quot;Network is sluggish?&quot;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 20 Sep 2022 07:56:36 +0000</created>
                <updated>Tue, 20 Sep 2022 08:34:45 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>1</watches>
                                                                                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i030p3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>