<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:36:44 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10622] recovery-double-scale test_pairwise_fail: client OOM</title>
                <link>https://jira.whamcloud.com/browse/LU-10622</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;recovery-double-scale test_pairwise_fail - Timeout occurred after 484 mins, last suite running was recovery-double-scale, restarting cluster to continue tests&lt;br/&gt;
^^^^^^^^^^^^^ DO NOT REMOVE LINE ABOVE ^^^^^^^^^^^^^&lt;/p&gt;

&lt;p&gt;This issue was created by maloo for sarah_lw &amp;lt;wei3.liu@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/4932564a-fded-11e7-bd00-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/4932564a-fded-11e7-bd00-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_pairwise_fail failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Timeout occurred after 484 mins, last suite running was recovery-double-scale, restarting cluster to continue tests
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&amp;lt;&amp;lt;Please provide additional information about the failure here&amp;gt;&amp;gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="50621">LU-10622</key>
            <summary>recovery-double-scale test_pairwise_fail: client OOM</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="sarah">Sarah Liu</reporter>
                        <labels>
                    </labels>
                <created>Wed, 7 Feb 2018 00:20:11 +0000</created>
                <updated>Thu, 3 Nov 2022 20:46:45 +0000</updated>
                                            <version>Lustre 2.11.0</version>
                    <version>Lustre 2.10.5</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="220195" author="sarah" created="Wed, 7 Feb 2018 00:29:55 +0000"  >&lt;p&gt;From client-2 console log linked under recovery-mds-scale, found OOM&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/4a7832a4-fded-11e7-bd00-52540065bddc/show_text&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/4a7832a4-fded-11e7-bd00-52540065bddc/show_text&lt;/a&gt;&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  890.254565] Lustre: DEBUG MARKER: == recovery-double-scale test pairwise_fail: pairwise combination of clients, MDS, and OST failures == 18:42:46 (1516387366)
[  890.406126] Lustre: DEBUG MARKER: PATH=/opt/iozone/bin:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lustre/tests/racer:/usr/lib64/lustre/../lustre-iokit/sgpdd-survey:/usr/lib64/lustre/tests:/usr/lib64/lustre/utils/gss:/usr/lib64/lustre/utils:/usr/lib64/qt-3.3/bin:/usr/lib64/comp
[  890.451121] Lustre: DEBUG MARKER: /usr/sbin/lctl mark Started client load: dd on onyx-41vm3
[  890.610069] Lustre: DEBUG MARKER: Started client load: dd on onyx-41vm3
[  890.815339] Lustre: DEBUG MARKER: /usr/sbin/lctl mark Started client load: tar on onyx-41vm4
[  890.972956] Lustre: DEBUG MARKER: Started client load: tar on onyx-41vm4
[  893.361427] Lustre: DEBUG MARKER: cat /tmp/client-load.pid
[ 1204.465816] Lustre: DEBUG MARKER: /usr/sbin/lctl mark 
[ 1204.465816] ==== START === test 1: failover MDS, then OST ==========
[ 1210.830668] Lustre: DEBUG MARKER: Fri Jan 19 18:48:05 2018
[ 1210.830668] 
[ 1213.997192] Lustre: DEBUG MARKER: /usr/sbin/lctl mark ==== Checking the clients loads BEFORE failover -- failure NOT OK
[ 1216.173546] Lustre: DEBUG MARKER: ==== Checking the clients loads BEFORE failover -- failure NOT OK
[ 1222.239838] Lustre: DEBUG MARKER: rc=0;
[ 1222.239838] 			val=$(/usr/sbin/lctl get_param -n catastrophe 2&amp;gt;&amp;amp;1);
[ 1222.239838] 			if [[ $? -eq 0 &amp;amp;&amp;amp; $val -ne 0 ]]; then
[ 1222.239838] 				echo $(hostname -s): $val;
[ 1222.239838] 				rc=$val;
[ 1222.239838] 			fi;
[ 1222.239838] 			exit $rc
[ 1222.744739] Lustre: DEBUG MARKER: ps auxwww | grep -v grep | grep -q run_dd.sh
[ 1223.902089] Lustre: DEBUG MARKER: /usr/sbin/lctl mark Done checking client loads. Failing type1=MDS item1=mds1 ... 
[ 1224.084788] Lustre: DEBUG MARKER: Done checking client loads. Failing type1=MDS item1=mds1 ...
[ 1270.724016] Lustre: 7292:0:(client.c:2100:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1516387738/real 1516387738]  req@ffff88001eef7600 x1590047326596256/t0(0) o35-&amp;gt;lustre-MDT0000-mdc-ffff88007ba7e800@10.2.8.138@tcp:23/10 lens 512/936 e 0 to 1 dl 1516387745 ref 2 fl Rpc:X/0/ffffffff rc 0/-1
[ 1270.726937] Lustre: lustre-MDT0000-mdc-ffff88007ba7e800: Connection to lustre-MDT0000 (at 10.2.8.138@tcp) was lost; in progress operations using this service will wait for recovery to complete
[ 1287.728010] Lustre: 3108:0:(client.c:2100:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1516387718/real 1516387737]  req@ffff88001eef4f00 x1590047326531120/t0(0) o400-&amp;gt;MGC10.2.8.138@tcp@10.2.8.138@tcp:26/25 lens 224/224 e 0 to 1 dl 1516387762 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
[ 1287.730909] LustreError: 166-1: MGC10.2.8.138@tcp: Connection to MGS (at 10.2.8.138@tcp) was lost; in progress operations using this service will fail
[ 1292.732016] Lustre: 3109:0:(client.c:2100:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1516387723/real 1516387737]  req@ffff88001eef7000 x1590047326560128/t0(0) o400-&amp;gt;lustre-MDT0000-mdc-ffff88007ba7e800@10.2.8.138@tcp:12/10 lens 224/224 e 0 to 1 dl 1516387767 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
[ 1292.735020] Lustre: 3109:0:(client.c:2100:ptlrpc_expire_one_request()) Skipped 1 previous similar message
[ 1297.733277] Lustre: Evicted from MGS (at MGC10.2.8.138@tcp_1) after server handle changed from 0x574864cbfa148717 to 0x25e1f99c947a4e92
[ 1297.734602] Lustre: MGC10.2.8.138@tcp: Connection restored to MGC10.2.8.138@tcp_1 (at 10.2.8.139@tcp)
[ 1297.735509] Lustre: 3109:0:(client.c:2100:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1516387728/real 1516387737]  req@ffff88000f130600 x1590047326574624/t0(0) o400-&amp;gt;lustre-MDT0000-mdc-ffff88007ba7e800@10.2.8.139@tcp:12/10 lens 224/224 e 0 to 1 dl 1516387772 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
[ 1302.738007] Lustre: 3109:0:(client.c:2100:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1516387733/real 1516387737]  req@ffff88000f133000 x1590047326593552/t0(0) o400-&amp;gt;lustre-MDT0000-mdc-ffff88007ba7e800@10.2.8.139@tcp:12/10 lens 224/224 e 0 to 1 dl 1516387777 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
[ 1313.679569] Lustre: DEBUG MARKER: /usr/sbin/lctl mark onyx-41vm8.onyx.hpdd.intel.com: executing set_default_debug vfstrace rpctrace dlmtrace neterror ha config ioctl super lfsck all 4
[ 1313.732006] Lustre: 3109:0:(client.c:2100:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1516387743/real 1516387743]  req@ffff88000f131200 x1590047326596400/t0(0) o400-&amp;gt;lustre-MDT0000-mdc-ffff88007ba7e800@10.2.8.139@tcp:12/10 lens 224/224 e 0 to 1 dl 1516387787 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
[ 1313.734755] Lustre: 3109:0:(client.c:2100:ptlrpc_expire_one_request()) Skipped 1 previous similar message
[ 1316.920391] Lustre: lustre-MDT0000-mdc-ffff88007ba7e800: Connection restored to 10.2.8.139@tcp (at 10.2.8.139@tcp)
[ 1316.926808] Lustre: DEBUG MARKER: onyx-41vm8.onyx.hpdd.intel.com: executing set_default_debug vfstrace rpctrace dlmtrace neterror ha config ioctl super lfsck all 4
[ 1318.510417] Lustre: DEBUG MARKER: /usr/sbin/lctl mark                             Failing type2=OST item2=ost1 ... 
[ 1318.940320] Lustre: DEBUG MARKER: Failing type2=OST item2=ost1 ...
[ 1375.152025] LustreError: 3103:0:(events.c:199:client_bulk_callback()) event type 1, status -5, desc ffff88005fce2600
[ 1375.153134] LustreError: 3103:0:(events.c:199:client_bulk_callback()) event type 1, status -5, desc ffff88005fce2600
[ 1375.154243] LustreError: 3100:0:(events.c:199:client_bulk_callback()) event type 1, status -111, desc ffff88005fce2e00
[ 1375.155257] LustreError: 3103:0:(events.c:199:client_bulk_callback()) event type 1, status -5, desc ffff88005fce2e00
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="220293" author="sarah" created="Wed, 7 Feb 2018 01:06:11 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 2029.017611] Kernel panic - not syncing: Out of memory: system-wide panic_on_oom is enabled
[ 2029.017611] 
[ 2029.018521] CPU: 1 PID: 476 Comm: sssd Tainted: G           OE  ------------   3.10.0-693.11.6.el7.x86_64 #1
[ 2029.019426] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 2029.019970] Call Trace:
[ 2029.020219]  [&amp;lt;ffffffff816a5ea1&amp;gt;] dump_stack+0x19/0x1b
[ 2029.020714]  [&amp;lt;ffffffff8169fd64&amp;gt;] panic+0xe8/0x20d
[ 2029.021171]  [&amp;lt;ffffffff81188265&amp;gt;] check_panic_on_oom+0x55/0x60
[ 2029.021728]  [&amp;lt;ffffffff8118865b&amp;gt;] out_of_memory+0x23b/0x4f0
[ 2029.022256]  [&amp;lt;ffffffff816a1d9a&amp;gt;] __alloc_pages_slowpath+0x5d6/0x724
[ 2029.022861]  [&amp;lt;ffffffff8118eaa5&amp;gt;] __alloc_pages_nodemask+0x405/0x420
[ 2029.023476]  [&amp;lt;ffffffff811d6075&amp;gt;] alloc_pages_vma+0xb5/0x200
[ 2029.024010]  [&amp;lt;ffffffff811c63fd&amp;gt;] read_swap_cache_async+0xed/0x160
[ 2029.024597]  [&amp;lt;ffffffff811c6518&amp;gt;] swapin_readahead+0xa8/0x110
[ 2029.025138]  [&amp;lt;ffffffff811b421b&amp;gt;] handle_mm_fault+0xadb/0xfa0
[ 2029.025685]  [&amp;lt;ffffffff810d025e&amp;gt;] ? dequeue_task_fair+0x41e/0x660
[ 2029.026254]  [&amp;lt;ffffffff810c93f5&amp;gt;] ? sched_clock_cpu+0x85/0xc0
[ 2029.026805]  [&amp;lt;ffffffff816b37e4&amp;gt;] __do_page_fault+0x154/0x450
[ 2029.027344]  [&amp;lt;ffffffff816b3bc6&amp;gt;] trace_do_page_fault+0x56/0x150
[ 2029.027915]  [&amp;lt;ffffffff816b325a&amp;gt;] do_async_page_fault+0x1a/0xd0
[ 2029.028479]  [&amp;lt;ffffffff816af928&amp;gt;] async_page_fault+0x28/0x30
[ 2029.029011]  [&amp;lt;ffffffff81219034&amp;gt;] ? do_sys_poll+0x184/0x580
[ 2029.029544]  [&amp;lt;ffffffff81218ff5&amp;gt;] ? do_sys_poll+0x145/0x580
[ 2029.030070]  [&amp;lt;ffffffff816a9700&amp;gt;] ? bit_wait+0x50/0x50
[ 2029.030565]  [&amp;lt;ffffffff816ab6d9&amp;gt;] ? schedule+0x29/0x70
[ 2029.031053]  [&amp;lt;ffffffff81062efe&amp;gt;] ? kvm_clock_get_cycles+0x1e/0x20
[ 2029.031639]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.032281]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.032935]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.033584]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.034226]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.034882]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.035548]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.036192]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.036842]  [&amp;lt;ffffffff81217d10&amp;gt;] ? poll_select_copy_remaining+0x150/0x150
[ 2029.037495]  [&amp;lt;ffffffff81219534&amp;gt;] SyS_poll+0x74/0x110
[ 2029.037973]  [&amp;lt;ffffffff816b89fd&amp;gt;] system_call_fastpath+0x16/0x1b
[ 2029.038543]  [&amp;lt;ffffffff816b889d&amp;gt;] ? system_call_after_swapgs+0xca/0x214
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Initializing cgroup subsys cpuacct
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="231761" author="sarah" created="Thu, 9 Aug 2018 19:44:48 +0000"  >&lt;p&gt;on b2_10 2.10.5 RC1&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/7c3dd016-9882-11e8-b0aa-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/7c3dd016-9882-11e8-b0aa-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzscf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>