<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:55:52 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5944] Failover recovery-mds-scale test_failover_mds: client OOM</title>
                <link>https://jira.whamcloud.com/browse/LU-5944</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/ba0b8798-6902-11e4-9d25-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/ba0b8798-6902-11e4-9d25-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_failover_mds failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test_failover_mds returned 4
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;client console&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;03:26:42:Lustre: DEBUG MARKER: ==== Checking the clients loads AFTER failover -- failure NOT OK
03:26:42:Lustre: DEBUG MARKER: rc=$(lctl get_param -n catastrophe);
03:26:42:		if [ $rc -ne 0 ]; then echo $(hostname): $rc; fi
03:26:42:		exit $rc
03:26:42:Lustre: DEBUG MARKER: ps auxwww | grep -v grep | grep -q run_dd.sh
03:26:42:Lustre: DEBUG MARKER: /usr/sbin/lctl mark mds1 has failed over 4 times, and counting...
03:26:42:Lustre: DEBUG MARKER: mds1 has failed over 4 times, and counting...
03:26:42:Lustre: 2188:0:(client.c:1947:ptlrpc_expire_one_request()) @@@ Request sent has timed out for sent delay: [sent 1415445951/real 1415445967]  req@ffff880032a59800 x1484198832385960/t0(0) o400-&amp;gt;lustre-MDT0000-mdc-ffff88007a10bc00@10.2.4.189@tcp:12/10 lens 224/224 e 0 to 1 dl 1415445958 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:26:42:Lustre: 2188:0:(client.c:1947:ptlrpc_expire_one_request()) Skipped 1 previous similar message
03:31:34:Lustre: Evicted from MGS (at 10.2.4.185@tcp) after server handle changed from 0xd9f20fa7646c5b6b to 0x11b90fceb8d34e70
03:31:34:Lustre: MGC10.2.4.185@tcp: Connection restored to MGS (at 10.2.4.185@tcp)
03:31:34:LustreError: 2187:0:(client.c:2817:ptlrpc_replay_interpret()) @@@ status 301, old was 0  req@ffff88005bc46c00 x1484198830088128/t17179982789(17179982789) o101-&amp;gt;lustre-MDT0000-mdc-ffff88007a10bc00@10.2.4.185@tcp:12/10 lens 704/544 e 0 to 0 dl 1415446038 ref 2 fl Interpret:RP/4/0 rc 301/301
03:31:34:Lustre: lustre-MDT0000-mdc-ffff88007a10bc00: Connection restored to lustre-MDT0000 (at 10.2.4.185@tcp)
03:31:34:ntpd invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0, oom_score_adj=0
03:31:34:ntpd cpuset=/ mems_allowed=0
03:31:34:Pid: 1849, comm: ntpd Not tainted 2.6.32-431.29.2.el6.x86_64 #1
03:31:34:Call Trace:
03:31:34: [&amp;lt;ffffffff810d0361&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
03:31:34: [&amp;lt;ffffffff81122730&amp;gt;] ? dump_header+0x90/0x1b0
03:31:34: [&amp;lt;ffffffff8112289e&amp;gt;] ? check_panic_on_oom+0x4e/0x80
03:31:34: [&amp;lt;ffffffff81122f8b&amp;gt;] ? out_of_memory+0x1bb/0x3c0
03:31:34: [&amp;lt;ffffffff8112f90f&amp;gt;] ? __alloc_pages_nodemask+0x89f/0x8d0
03:31:34: [&amp;lt;ffffffff8116799a&amp;gt;] ? alloc_pages_vma+0x9a/0x150
03:31:34: [&amp;lt;ffffffff8115b622&amp;gt;] ? read_swap_cache_async+0xf2/0x160
03:31:34: [&amp;lt;ffffffff8115c149&amp;gt;] ? valid_swaphandles+0x69/0x150
03:31:34: [&amp;lt;ffffffff8115b717&amp;gt;] ? swapin_readahead+0x87/0xc0
03:31:34: [&amp;lt;ffffffff8114a9bd&amp;gt;] ? handle_pte_fault+0x6dd/0xb00
03:31:34: [&amp;lt;ffffffff81060aa3&amp;gt;] ? perf_event_task_sched_out+0x33/0x70
03:31:34: [&amp;lt;ffffffff8114b00a&amp;gt;] ? handle_mm_fault+0x22a/0x300
03:31:34: [&amp;lt;ffffffff8104a8d8&amp;gt;] ? __do_page_fault+0x138/0x480
03:31:34: [&amp;lt;ffffffff811a0870&amp;gt;] ? pollwake+0x0/0x60
03:31:34: [&amp;lt;ffffffff811a0870&amp;gt;] ? pollwake+0x0/0x60
03:31:34: [&amp;lt;ffffffff811a0870&amp;gt;] ? pollwake+0x0/0x60
03:31:34: [&amp;lt;ffffffff8152e99e&amp;gt;] ? do_page_fault+0x3e/0xa0
03:31:34: [&amp;lt;ffffffff8152bd55&amp;gt;] ? page_fault+0x25/0x30
03:31:34: [&amp;lt;ffffffff8128dea6&amp;gt;] ? copy_user_generic_unrolled+0x86/0xb0
03:31:34: [&amp;lt;ffffffff810129de&amp;gt;] ? copy_user_generic+0xe/0x20
03:31:34: [&amp;lt;ffffffff811a0589&amp;gt;] ? set_fd_set+0x49/0x60
03:31:34: [&amp;lt;ffffffff811a1a4c&amp;gt;] ? core_sys_select+0x1bc/0x2c0
03:31:34: [&amp;lt;ffffffff8103f9d8&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
03:31:34: [&amp;lt;ffffffff8103f9d8&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
03:31:34: [&amp;lt;ffffffff8103ea6c&amp;gt;] ? kvm_clock_read+0x1c/0x20
03:31:34: [&amp;lt;ffffffff8103ea79&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
03:31:34: [&amp;lt;ffffffff810a6d31&amp;gt;] ? ktime_get_ts+0xb1/0xf0
03:31:34: [&amp;lt;ffffffff811a1da7&amp;gt;] ? sys_select+0x47/0x110
03:31:34: [&amp;lt;ffffffff8100b072&amp;gt;] ? system_call_fastpath+0x16/0x1b
03:31:34:Mem-Info:
03:31:34:Node 0 DMA per-cpu:
03:31:34:CPU    0: hi:    0, btch:   1 usd:   0
03:31:34:CPU    1: hi:    0, btch:   1 usd:   0
03:31:34:Node 0 DMA32 per-cpu:
03:31:34:CPU    0: hi:  186, btch:  31 usd:  91
03:31:34:CPU    1: hi:  186, btch:  31 usd:  41
03:31:34:active_anon:17 inactive_anon:0 isolated_anon:0
03:31:34: active_file:203782 inactive_file:205446 isolated_file:0
03:31:34: unevictable:0 dirty:440 writeback:30040 unstable:0
03:31:34: free:12464 slab_reclaimable:2779 slab_unreclaimable:42062
03:31:34: mapped:1935 shmem:0 pagetables:1109 bounce:0
03:31:34:Node 0 DMA free:8352kB min:332kB low:412kB high:496kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:6552kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15348kB mlocked:0kB dirty:0kB writeback:6552kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:840kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:11072 all_unreclaimable? yes
03:31:34:lowmem_reserve[]: 0 2004 2004 2004
03:31:34:Node 0 DMA32 free:41504kB min:44720kB low:55900kB high:67080kB active_anon:68kB inactive_anon:0kB active_file:815128kB inactive_file:815232kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:2052308kB mlocked:0kB dirty:1760kB writeback:113608kB mapped:7740kB shmem:0kB slab_reclaimable:11116kB slab_unreclaimable:167408kB kernel_stack:1464kB pagetables:4436kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:2961178 all_unreclaimable? yes
03:31:34:lowmem_reserve[]: 0 0 0 0
03:31:34:Node 0 DMA: 8*4kB 60*8kB 40*16kB 25*32kB 12*64kB 0*128kB 0*256kB 1*512kB 1*1024kB 2*2048kB 0*4096kB = 8352kB
03:31:34:Node 0 DMA32: 322*4kB 77*8kB 51*16kB 34*32kB 15*64kB 5*128kB 5*256kB 4*512kB 4*1024kB 0*2048kB 7*4096kB = 41504kB
03:31:34:227493 total pagecache pages
03:31:34:0 pages in swap cache
03:31:34:Swap cache stats: add 6524, delete 6524, find 2262/2343
03:31:34:Free swap  = 2703532kB
03:31:34:Total swap = 2725884kB
03:31:34:524284 pages RAM
03:31:34:43694 pages reserved
03:31:34:448398 pages shared
03:31:34:234817 pages non-shared
03:31:34:[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name
03:31:34:[  373]     0   373     2721       80   1     -17         -1000 udevd
03:31:34:[ 1019]     0  1019     2280       35   0       0             0 dhclient
03:31:34:[ 1071]     0  1071     6916      132   1     -17         -1000 auditd
03:31:34:[ 1087]     0  1087    63854      236   1       0             0 rsyslogd
03:31:34:[ 1116]     0  1116     2705      103   1       0             0 irqbalance
03:31:34:[ 1130]    32  1130     4744      157   1       0             0 rpcbind
03:31:34:[ 1142]     0  1142    49913      365   1       0             0 sssd
03:31:34:[ 1143]     0  1143    64328      866   1       0             0 sssd_be
03:31:34:[ 1144]     0  1144    50478      506   1       0             0 sssd_nss
03:31:34:[ 1145]     0  1145    48029      495   0       0             0 sssd_pam
03:31:34:[ 1146]     0  1146    47507      503   0       0             0 sssd_ssh
03:31:34:[ 1147]     0  1147    52608      486   0       0             0 sssd_pac
03:31:34:[ 1164]    29  1164     6357      215   1       0             0 rpc.statd
03:31:34:[ 1278]    81  1278     5871      139   0       0             0 dbus-daemon
03:31:34:[ 1316]     0  1316     1020      125   1       0             0 acpid
03:31:34:[ 1325]    68  1325     9921      331   1       0             0 hald
03:31:34:[ 1326]     0  1326     5081      248   1       0             0 hald-runner
03:31:34:[ 1358]     0  1358     5611      238   1       0             0 hald-addon-inpu
03:31:34:[ 1368]    68  1368     4483      236   1       0             0 hald-addon-acpi
03:31:34:[ 1388]     0  1388   168326      554   1       0             0 automount
03:31:34:[ 1434]     0  1434    26827       29   0       0             0 rpc.rquotad
03:31:34:[ 1438]     0  1438     5414       87   0       0             0 rpc.mountd
03:31:34:[ 1474]     0  1474     5773       86   1       0             0 rpc.idmapd
03:31:34:[ 1505]   496  1505    56785      294   1       0             0 munged
03:31:34:[ 1520]     0  1520    16656      100   0     -17         -1000 sshd
03:31:34:[ 1528]     0  1528     5545      179   1       0             0 xinetd
03:31:34:[ 1612]     0  1612    20846      610   1       0             0 master
03:31:34:[ 1620]    89  1620    20866      568   1       0             0 pickup
03:31:34:[ 1622]    89  1622    20909      569   1       0             0 qmgr
03:31:34:[ 1635]     0  1635    29324      153   1       0             0 crond
03:31:34:[ 1646]     0  1646     5385       76   0       0             0 atd
03:31:34:[ 1672]     0  1672    15585      146   0       0             0 certmonger
03:31:34:[ 1686]     0  1686     1020      133   1       0             0 agetty
03:31:34:[ 1687]     0  1687     1016      121   1       0             0 mingetty
03:31:34:[ 1689]     0  1689     1016      121   1       0             0 mingetty
03:31:34:[ 1691]     0  1691     1016      121   1       0             0 mingetty
03:31:34:[ 1693]     0  1693     1016      121   1       0             0 mingetty
03:31:34:[ 1695]     0  1695     1016      121   1       0             0 mingetty
03:31:34:[ 1697]     0  1697     1016      121   1       0             0 mingetty
03:31:34:[ 1701]     0  1701     2720       80   1     -17         -1000 udevd
03:31:34:[ 1702]     0  1702     2720       76   0     -17         -1000 udevd
03:31:34:[ 1849]    38  1849     8205      376   1       0             0 ntpd
03:31:34:[ 3864]     0  3864    15919      354   0       0             0 in.mrshd
03:31:34:[ 3870]     0  3870    26515      292   1       0             0 bash
03:31:34:[ 3892]     0  3892    26515      115   0       0             0 bash
03:31:34:[ 3893]     0  3893    26839      292   0       0             0 run_dd.sh
03:31:34:[ 6142]     0  6142     4346       97   0       0             0 anacron
03:31:34:[ 8144]     0  8144    26295      138   1       0             0 dd
03:31:34:Kernel panic - not syncing: Out of memory: system-wide panic_on_oom is enabled
03:31:34:
03:31:34:Pid: 1849, comm: ntpd Not tainted 2.6.32-431.29.2.el6.x86_64 #1
03:31:34:Call Trace:
03:31:34: [&amp;lt;ffffffff8152873c&amp;gt;] ? panic+0xa7/0x16f
03:31:34: [&amp;lt;ffffffff81122831&amp;gt;] ? dump_header+0x191/0x1b0
03:31:34: [&amp;lt;ffffffff811228cc&amp;gt;] ? check_panic_on_oom+0x7c/0x80
03:31:34: [&amp;lt;ffffffff81122f8b&amp;gt;] ? out_of_memory+0x1bb/0x3c0
03:31:34: [&amp;lt;ffffffff8112f90f&amp;gt;] ? __alloc_pages_nodemask+0x89f/0x8d0
03:31:34: [&amp;lt;ffffffff8116799a&amp;gt;] ? alloc_pages_vma+0x9a/0x150
03:31:34: [&amp;lt;ffffffff8115b622&amp;gt;] ? read_swap_cache_async+0xf2/0x160
03:31:34: [&amp;lt;ffffffff8115c149&amp;gt;] ? valid_swaphandles+0x69/0x150
03:31:34: [&amp;lt;ffffffff8115b717&amp;gt;] ? swapin_readahead+0x87/0xc0
03:31:34: [&amp;lt;ffffffff8114a9bd&amp;gt;] ? handle_pte_fault+0x6dd/0xb00
03:31:34: [&amp;lt;ffffffff81060aa3&amp;gt;] ? perf_event_task_sched_out+0x33/0x70
03:31:34: [&amp;lt;ffffffff8114b00a&amp;gt;] ? handle_mm_fault+0x22a/0x300
03:31:34: [&amp;lt;ffffffff8104a8d8&amp;gt;] ? __do_page_fault+0x138/0x480
03:31:34: [&amp;lt;ffffffff811a0870&amp;gt;] ? pollwake+0x0/0x60
03:31:34: [&amp;lt;ffffffff811a0870&amp;gt;] ? pollwake+0x0/0x60
03:31:34: [&amp;lt;ffffffff811a0870&amp;gt;] ? pollwake+0x0/0x60
03:31:34: [&amp;lt;ffffffff8152e99e&amp;gt;] ? do_page_fault+0x3e/0xa0
03:31:34: [&amp;lt;ffffffff8152bd55&amp;gt;] ? page_fault+0x25/0x30
03:31:34: [&amp;lt;ffffffff8128dea6&amp;gt;] ? copy_user_generic_unrolled+0x86/0xb0
03:31:34: [&amp;lt;ffffffff810129de&amp;gt;] ? copy_user_generic+0xe/0x20
03:31:34: [&amp;lt;ffffffff811a0589&amp;gt;] ? set_fd_set+0x49/0x60
03:31:34: [&amp;lt;ffffffff811a1a4c&amp;gt;] ? core_sys_select+0x1bc/0x2c0
03:31:34: [&amp;lt;ffffffff8103f9d8&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
03:31:34: [&amp;lt;ffffffff8103f9d8&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
03:31:34: [&amp;lt;ffffffff8103ea6c&amp;gt;] ? kvm_clock_read+0x1c/0x20
03:31:34: [&amp;lt;ffffffff8103ea79&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
03:31:34: [&amp;lt;ffffffff810a6d31&amp;gt;] ? ktime_get_ts+0xb1/0xf0
03:31:34: [&amp;lt;ffffffff811a1da7&amp;gt;] ? sys_select+0x47/0x110
03:31:34: [&amp;lt;ffffffff8100b072&amp;gt;] ? system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Info required for matching: recovery-mds-scale failover_mds&lt;/p&gt;</description>
                <environment>lustre-master build # 2733 RHEL6</environment>
        <key id="27683">LU-5944</key>
            <summary>Failover recovery-mds-scale test_failover_mds: client OOM</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Sat, 22 Nov 2014 00:02:53 +0000</created>
                <updated>Tue, 1 Dec 2015 19:38:04 +0000</updated>
                            <resolved>Sat, 22 Nov 2014 06:18:53 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="99851" author="yujian" created="Sat, 22 Nov 2014 06:18:53 +0000"  >&lt;p&gt;This is a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5483&quot; title=&quot;recovery-mds-scale test failover_mds: oom failure on client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5483&quot;&gt;LU-5483&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="26005">LU-5483</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx1cf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16600</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>