<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:12:45 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7883] sanity test_60a invokes oom-killer in subtest 10b and times out</title>
                <link>https://jira.whamcloud.com/browse/LU-7883</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;sanity test 60a invokes oom-killer and times out on subtest 10b. From a recent failure, the MDS console log has&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;02:35:42:Lustre: 6822:0:(llog_test.c:1468:llog_test_10()) 10a: create a catalog log with name: 851d315f
02:35:42:Lustre: 6822:0:(llog_test.c:1487:llog_test_10()) 10b: write 65536 log records
02:35:42:ntpd invoked oom-killer: gfp_mask=0x201da, order=0, oom_adj=0, oom_score_adj=0
02:35:42:ntpd cpuset=/ mems_allowed=0
02:35:42:Pid: 10244, comm: ntpd Not tainted 2.6.32-573.18.1.el6_lustre.ge5f28dc.x86_64 #1
02:35:42:Call Trace:
02:35:42: [&amp;lt;ffffffff810d71a1&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
02:35:42: [&amp;lt;ffffffff8112a9a0&amp;gt;] ? dump_header+0x90/0x1b0
02:35:42: [&amp;lt;ffffffff8112ab0e&amp;gt;] ? check_panic_on_oom+0x4e/0x80
02:35:42: [&amp;lt;ffffffff8112b1fb&amp;gt;] ? out_of_memory+0x1bb/0x3c0
02:35:42: [&amp;lt;ffffffff81137c3c&amp;gt;] ? __alloc_pages_nodemask+0x93c/0x950
02:35:42: [&amp;lt;ffffffff811709ba&amp;gt;] ? alloc_pages_current+0xaa/0x110
02:35:42: [&amp;lt;ffffffff81127d97&amp;gt;] ? __page_cache_alloc+0x87/0x90
02:35:42: [&amp;lt;ffffffff8112777e&amp;gt;] ? find_get_page+0x1e/0xa0
02:35:42: [&amp;lt;ffffffff81128d37&amp;gt;] ? filemap_fault+0x1a7/0x500
02:35:42: [&amp;lt;ffffffff81152314&amp;gt;] ? __do_fault+0x54/0x530
02:35:42: [&amp;lt;ffffffff811a9f30&amp;gt;] ? pollwake+0x0/0x60
02:35:42: [&amp;lt;ffffffff811528e7&amp;gt;] ? handle_pte_fault+0xf7/0xb20
02:35:42: [&amp;lt;ffffffff8153cd75&amp;gt;] ? _read_unlock_bh+0x15/0x20
02:35:42: [&amp;lt;ffffffff8145b227&amp;gt;] ? sock_i_uid+0x47/0x60
02:35:42: [&amp;lt;ffffffff81012bbe&amp;gt;] ? copy_user_generic+0xe/0x20
02:35:42: [&amp;lt;ffffffff811535a9&amp;gt;] ? handle_mm_fault+0x299/0x3d0
02:35:42: [&amp;lt;ffffffff8104f156&amp;gt;] ? __do_page_fault+0x146/0x500
02:35:42: [&amp;lt;ffffffff81042fc9&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
02:35:42: [&amp;lt;ffffffff810ad40f&amp;gt;] ? ktime_get_ts+0xbf/0x100
02:35:42: [&amp;lt;ffffffff811a9b78&amp;gt;] ? poll_select_copy_remaining+0xf8/0x150
02:35:42: [&amp;lt;ffffffff8153fd1e&amp;gt;] ? do_page_fault+0x3e/0xa0
02:35:42: [&amp;lt;ffffffff8153d0c5&amp;gt;] ? page_fault+0x25/0x30
02:35:42:Mem-Info:
02:35:42:Node 0 DMA per-cpu:
02:35:42:CPU    0: hi:    0, btch:   1 usd:   0
02:35:42:CPU    1: hi:    0, btch:   1 usd:   0
02:35:42:Node 0 DMA32 per-cpu:
02:35:42:CPU    0: hi:  186, btch:  31 usd:  75
02:35:42:CPU    1: hi:  186, btch:  31 usd:   0
02:35:42:active_anon:3065 inactive_anon:3126 isolated_anon:0
02:35:42: active_file:1149 inactive_file:1114 isolated_file:0
02:35:42: unevictable:0 dirty:0 writeback:3125 unstable:0
02:35:42: free:13241 slab_reclaimable:2692 slab_unreclaimable:420617
02:35:42: mapped:1 shmem:3 pagetables:1219 bounce:0
02:35:42:Node 0 DMA free:8356kB min:332kB low:412kB high:496kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:4kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15348kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:4kB slab_unreclaimable:7360kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:1 all_unreclaimable? yes
02:35:42:lowmem_reserve[]: 0 2004 2004 2004
02:35:42:Node 0 DMA32 free:44608kB min:44720kB low:55900kB high:67080kB active_anon:12260kB inactive_anon:12504kB active_file:4596kB inactive_file:4452kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:2052308kB mlocked:0kB dirty:0kB writeback:3324kB mapped:4kB shmem:12kB slab_reclaimable:10764kB slab_unreclaimable:1674856kB kernel_stack:4592kB pagetables:4876kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:279133 all_unreclaimable? yes
02:35:42:lowmem_reserve[]: 0 0 0 0
02:35:42:Node 0 DMA: 3*4kB 1*8kB 3*16kB 3*32kB 2*64kB 3*128kB 2*256kB 2*512kB 2*1024kB 2*2048kB 0*4096kB = 8356kB
02:35:42:Node 0 DMA32: 1325*4kB 753*8kB 501*16kB 426*32kB 90*64kB 2*128kB 1*256kB 1*512kB 1*1024kB 0*2048kB 1*4096kB = 44876kB
02:35:42:5204 total pagecache pages
02:35:42:2638 pages in swap cache
02:35:42:Swap cache stats: add 3431, delete 825, find 6/6
02:35:42:Free swap  = 4115064kB
02:35:42:Total swap = 4128764kB
02:35:42:524284 pages RAM
02:35:42:43737 pages reserved
02:35:42:2420 pages shared
02:35:42:437717 pages non-shared
02:35:42:[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name
02:35:42:[  456]     0   456     2695        4   0     -17         -1000 udevd
02:35:42:[  761]     0   761     2695        9   0     -17         -1000 udevd
02:35:42:[  915]     0   915     2694       12   0     -17         -1000 udevd
02:35:42:[ 1175]     0  1175     2280        1   0       0             0 dhclient
02:35:42:[ 1238]     0  1238     6905       13   1     -17         -1000 auditd
02:35:42:[ 1272]     0  1272    63854      276   0       0             0 rsyslogd
02:35:42:[ 1306]     0  1306     4561        1   0       0             0 irqbalance
02:35:42:[ 1324]    32  1324     4744        1   0       0             0 rpcbind
02:35:42:[ 1340]     0  1340    52816        5   0       0             0 sssd
02:35:42:[ 1341]     0  1341    74242       55   1       0             0 sssd_be
02:35:42:[ 1342]     0  1342    54172        6   1       0             0 sssd_nss
02:35:42:[ 1343]     0  1343    50533        2   1       0             0 sssd_pam
02:35:42:[ 1344]     0  1344    50020       26   0       0             0 sssd_ssh
02:35:42:[ 1345]     0  1345    55112        2   1       0             0 sssd_pac
02:35:42:[ 1367]    29  1367     6357        1   1       0             0 rpc.statd
02:35:42:[ 1401]    81  1401     5878        3   0       0             0 dbus-daemon
02:35:42:[ 1422]     0  1422    47234        1   1       0             0 cupsd
02:35:42:[ 1466]     0  1466     1020        0   0       0             0 acpid
02:35:42:[ 1478]    68  1478    10493        1   0       0             0 hald
02:35:42:[ 1479]     0  1479     5099        1   1       0             0 hald-runner
02:35:42:[ 1511]     0  1511     5629        1   0       0             0 hald-addon-inpu
02:35:42:[ 1523]    68  1523     4501        1   0       0             0 hald-addon-acpi
02:35:42:[ 1547]     0  1547   169290      182   1       0             0 automount
02:35:42:[ 1601]     0  1601    26827        0   0       0             0 rpc.rquotad
02:35:42:[ 1606]     0  1606     5417        0   0       0             0 rpc.mountd
02:35:42:[ 1651]     0  1651     5774        1   0       0             0 rpc.idmapd
02:35:42:[ 1688]   496  1688    56786      138   0       0             0 munged
02:35:42:[ 8272]     0  8272    16556        0   1     -17         -1000 sshd
02:35:42:[ 8283]     0  8283     5429       36   1       0             0 xinetd
02:35:42:[ 8373]     0  8373    20737      227   0       0             0 master
02:35:42:[ 8387]    89  8387    20757      219   0       0             0 pickup
02:35:42:[ 8388]    89  8388    20800      221   0       0             0 qmgr
02:35:42:[ 8402]     0  8402    29215      151   0       0             0 crond
02:35:42:[ 8421]     0  8421     5276       46   1       0             0 atd
02:35:42:[ 8453]     0  8453    16112      187   1       0             0 certmonger
02:35:42:[ 8482]     0  8482     1020       23   1       0             0 agetty
02:35:42:[ 8486]     0  8486     1016       21   1       0             0 mingetty
02:35:42:[ 8491]     0  8491     1016       22   1       0             0 mingetty
02:35:42:[ 8494]     0  8494     1016       22   0       0             0 mingetty
02:35:42:[ 8499]     0  8499     1016       21   1       0             0 mingetty
02:35:42:[ 8504]     0  8504     1016       21   1       0             0 mingetty
02:35:42:[ 8507]     0  8507     1016       21   0       0             0 mingetty
02:35:42:[10244]    38 10244     8207      161   1       0             0 ntpd
02:35:42:[30737]     0 30737     4237       52   1       0             0 anacron
02:35:42:[ 6720]     0  6720    15806      178   0       0             0 in.mrshd
02:35:42:[ 6721]     0  6721    26515       66   0       0             0 bash
02:35:42:[ 6746]     0  6746    26515       66   1       0             0 bash
02:35:42:[ 6747]     0  6747    27593      618   0       0             0 sh
02:35:42:[ 6822]     0  6822    28024       73   0       0             0 lctl
02:35:42:Kernel panic - not syncing: Out of memory: system-wide panic_on_oom is enabled
02:35:42:
02:35:42:Pid: 10244, comm: ntpd Not tainted 2.6.32-573.18.1.el6_lustre.ge5f28dc.x86_64 #1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Here are recent logs for this type of failure&lt;br/&gt;
2016-03-10 &lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/2e6d4b9a-e700-11e5-bdde-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/2e6d4b9a-e700-11e5-bdde-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/6c42ab98-e6d0-11e5-8590-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/6c42ab98-e6d0-11e5-8590-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;2016-03-14 &lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/8f4538da-ea7d-11e5-8779-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/8f4538da-ea7d-11e5-8779-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/bc22e04c-ea4f-11e5-8606-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/bc22e04c-ea4f-11e5-8606-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/7dc47e5e-ea41-11e5-8606-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/7dc47e5e-ea41-11e5-8606-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/a332f46a-e9fe-11e5-8186-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/a332f46a-e9fe-11e5-8186-5254006e85c2&lt;/a&gt;&lt;/p&gt;</description>
                <environment>review-dne-part-1</environment>
        <key id="35377">LU-7883</key>
            <summary>sanity test_60a invokes oom-killer in subtest 10b and times out</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ys">Yang Sheng</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                    </labels>
                <created>Tue, 15 Mar 2016 22:51:30 +0000</created>
                <updated>Tue, 24 Oct 2017 22:20:48 +0000</updated>
                            <resolved>Mon, 11 Apr 2016 13:59:27 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="145729" author="bfaccini" created="Wed, 16 Mar 2016 08:50:33 +0000"  >&lt;p&gt;Could it be that the fix for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt; (to sync LLOG device frequently in order to force flushing of huge number of journal commit callbacks, that have been generated due to huge and artificial LLOG activity in a very short period of time, and thus avoid OOM situations on relatively small memory-sized VMs of auto-test) needs to be expanded to previous sub_tests, also creating/deleting bunches of LLOG records, in llog_test() ??&lt;br/&gt;
This was already suspected due to Oleg&apos;s comment in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt; arguing he already encountered OOM situations before llog_test_10() stage ...&lt;br/&gt;
Also, there may be some new VM configuration causing even less memory to be available and thus cause to trigger OOM quicker ?&lt;/p&gt;</comment>
                            <comment id="145831" author="jgmitter" created="Wed, 16 Mar 2016 18:27:34 +0000"  >&lt;p&gt;Hi Yang Sheng,&lt;br/&gt;
Can you please have a look at this one?&lt;br/&gt;
Thanks.&lt;br/&gt;
Joe&lt;/p&gt;</comment>
                            <comment id="145900" author="ys" created="Thu, 17 Mar 2016 07:00:42 +0000"  >&lt;p&gt;Got it.&lt;/p&gt;</comment>
                            <comment id="145902" author="yong.fan" created="Thu, 17 Mar 2016 07:17:09 +0000"  >&lt;p&gt;Another failure instance on master:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/2b32bf00-ebce-11e5-aae5-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/2b32bf00-ebce-11e5-aae5-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="145908" author="adilger" created="Thu, 17 Mar 2016 08:26:55 +0000"  >&lt;p&gt;This failed for my patches a couple of times, the most recent was:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/b2e27412-ebde-11e5-93cc-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/b2e27412-ebde-11e5-93cc-5254006e85c2&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;llog_test.c:1487:llog_test_10()) 10b: write 65536 log records
16:15:24:ntpd invoked oom-killer: gfp_mask=0x201da, order=0, oom_adj=0, oom_score_adj=0
... slab_unreclaimable:1675844kB ...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It should be possible to run this test locally with &lt;tt&gt;lctl set_param debug=+malloc&lt;/tt&gt; and see where it is allocating so much memory, even if it isn&apos;t hitting OOM.  The autotest VM has 2GB, which should be plenty to run sanity tests, but the slab is consuming 1.6GB of that so it will be pretty clear why 65000 llog records use 1.6GB of RAM.&lt;/p&gt;

</comment>
                            <comment id="145910" author="bfaccini" created="Thu, 17 Mar 2016 08:33:39 +0000"  >&lt;p&gt;Andreas, may be there is something new but if you look to my previous OOM condition analysis in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt;, you can see that the Slab consumed space was for journal commit callbacks of previous LLOG operations. &lt;br/&gt;
And before llog_test_10() step, there are some previous sub-tests also dealing with a lot of LLOG records.&lt;br/&gt;
So I wonder if there may be the need to also flush/sync (dt_sync) their associated callbacks, and why not already as a first start step of llog_test_10() itself that I forgot to do in my previous patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt;, assuming that the only mass LLOG operations will occur there at this time.&lt;/p&gt;</comment>
                            <comment id="146038" author="bfaccini" created="Thu, 17 Mar 2016 23:38:22 +0000"  >&lt;p&gt;I have analyzed one of the recent crash-dumps for this same issue that has occurred during one of my patch auto-tests, and it shows the same situation that I had already discovered and detailed in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt;, memory is full due to huge backlog of journal commit callbacks and their related in-memory stuff.&lt;/p&gt;

&lt;p&gt;So seems that my previous thought (need to also flush/sync as a first start step of llog_test_10(), and may be also before in previous tests also creating/deleting bunches of LLOG records) should be able to fix .&lt;/p&gt;

&lt;p&gt;Also, I wonder if this problem is not more frequent during tests suites in a DNE environment, because I have found in the crash-dump that 2 MDTs are mounted/started on MDS node, and this may cause more memory to be in use.&lt;/p&gt;

&lt;p&gt;And last, even if this can not be set as this problem&apos;s root case, I really think that using VMs made of a single processor and 2GB memory, can not be considered as representative platforms to handle heavy workloads simulations and also to allow accurate testing of Lustre multi-threaded internal mechanisms.&lt;/p&gt;</comment>
                            <comment id="146040" author="adilger" created="Thu, 17 Mar 2016 23:47:30 +0000"  >&lt;p&gt;Definitely we need to be running the VMs in SMP mode, so at least 2 processors to ensure that there is scheduling between threads. However, I also think that part of the purpose of the VM test environment is to stress Lustre, so having less memory is good. Otherwise, there may be leaks that don&apos;t cause any problems because we don&apos;t run our tests for so long that the VM runs out of memory. &lt;/p&gt;</comment>
                            <comment id="146063" author="ys" created="Fri, 18 Mar 2016 03:56:29 +0000"  >&lt;p&gt;It is really a same issue with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt;. The size-512 very high while llog_test running. But i am no trigger OOM in my test box(2G ram &amp;amp; 2 cores). &lt;/p&gt;</comment>
                            <comment id="146078" author="bfaccini" created="Fri, 18 Mar 2016 09:44:21 +0000"  >&lt;p&gt;Andreas, I agree for the low memory VM profile in order to stress Lustre against, but playing so we will expose to this kind of OOM situations when we reach some threshold!&lt;/p&gt;

&lt;p&gt;Yang Sheng, I wonder if with an additional Core available, this could leave the journaling/jbd2 threads to be able to handle more efficiently the commit callbacks backlog whose data consume all memory.&lt;/p&gt;

&lt;p&gt;So again, my 2 cents that simply adding a new dt_sync() at the very beginning of llog_test_10() code (like already done inside the sub-test itself, by patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7329&quot; title=&quot;sanity test_60a timeouts with &#8220;* invoking oom-killer&#8221; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7329&quot;&gt;&lt;del&gt;LU-7329&lt;/del&gt;&lt;/a&gt;) will help/fix by allowing callbacks from previous sub-tests LLOG ops to be handled.&lt;/p&gt;</comment>
                            <comment id="146085" author="ys" created="Fri, 18 Mar 2016 13:40:36 +0000"  >&lt;p&gt;Hi, Bruno,&lt;/p&gt;

&lt;p&gt;I cannot reproduce it even with one core VM. So i think maybe some other reason cause such problem. Anyway, I&apos;ll made a patch as you suggestion. Let me look what happen with it.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
YangSheng&lt;/p&gt;</comment>
                            <comment id="146087" author="gerrit" created="Fri, 18 Mar 2016 13:57:28 +0000"  >&lt;p&gt;Yang Sheng (yang.sheng@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19005&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19005&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7883&quot; title=&quot;sanity test_60a invokes oom-killer in subtest 10b and times out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7883&quot;&gt;&lt;del&gt;LU-7883&lt;/del&gt;&lt;/a&gt; llog: Flush out journal callback data&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c2237e6d1b4729a84a547a25f31ee91eb07a8b68&lt;/p&gt;</comment>
                            <comment id="146331" author="rhenwood" created="Mon, 21 Mar 2016 15:46:30 +0000"  >&lt;p&gt;Another failure on Master/review-dne-part-1:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/2733848a-eeb1-11e5-8047-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/2733848a-eeb1-11e5-8047-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="147072" author="gerrit" created="Mon, 28 Mar 2016 18:45:46 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/19005/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19005/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7883&quot; title=&quot;sanity test_60a invokes oom-killer in subtest 10b and times out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7883&quot;&gt;&lt;del&gt;LU-7883&lt;/del&gt;&lt;/a&gt; llog: Flush out journal callback data&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 97ef9af439181510cc4abce355f1833399aa9445&lt;/p&gt;</comment>
                            <comment id="147763" author="bogl" created="Mon, 4 Apr 2016 20:13:00 +0000"  >&lt;p&gt;another, seen in b2_8:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/1d8daa2a-fa9e-11e5-8951-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/1d8daa2a-fa9e-11e5-8951-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="147765" author="gerrit" created="Mon, 4 Apr 2016 20:19:04 +0000"  >&lt;p&gt;Bob Glossman (bob.glossman@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19321&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19321&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7883&quot; title=&quot;sanity test_60a invokes oom-killer in subtest 10b and times out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7883&quot;&gt;&lt;del&gt;LU-7883&lt;/del&gt;&lt;/a&gt; llog: Flush out journal callback data&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_8&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 780379da359be080612489d13a01bf48e2007f3c&lt;/p&gt;</comment>
                            <comment id="148383" author="jgmitter" created="Mon, 11 Apr 2016 13:59:27 +0000"  >&lt;p&gt;Landed to master for 2.9.0&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="35620">LU-7923</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="48553">LU-10056</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzy4kf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>