<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:50:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5288] RPCs returning -ENOMEM may trigger OOM killer on clients</title>
                <link>https://jira.whamcloud.com/browse/LU-5288</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Here&apos;s a good one. I have a client with plenty of free memory but an OST that is under memory pressure. In this case some RPCs will return -ENOMEM. From the right context this will trick the client kernel into thinking the client is OOM and invoke the OOM killer. Thereby killing syslogd and destroying the evidence of this bug.&lt;/p&gt;

&lt;p&gt;Copied from the console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 6098.064184] LustreError: 2011:0:(osc_request.c:1643:osc_brw_redo_request()) @@@ redo for recoverable error -12  req@ffff8801eeaf7a88 x1472537312230332/t0(0) o4-&amp;gt;lustre-OST0001-osc-ffff8801fea98e60@192.168.122.107@tcp:6/4 lens 488/416 e 0 to 0 dl 1404327022 ref 2 fl Interpret:RM/0/0 rc -12/-12
[ 6102.783479] 19 invoked oom-killer: gfp_mask=0x0, order=0, oom_adj=0, oom_score_adj=0
[ 6102.786064] 19 cpuset=/ mems_allowed=0
[ 6102.787435] Pid: 21314, comm: 19 Not tainted 2.6.32-431.5.1.el6.lustre.x86_64 #1
[ 6102.789907] Call Trace:
[ 6102.790607]  [&amp;lt;ffffffff815547cb&amp;gt;] ? _spin_unlock+0x2b/0x40
[ 6102.791759]  [&amp;lt;ffffffff81135053&amp;gt;] ? dump_header+0x83/0x210
[ 6102.792913]  [&amp;lt;ffffffff810b777d&amp;gt;] ? trace_hardirqs_on+0xd/0x10
[ 6102.794126]  [&amp;lt;ffffffff81135552&amp;gt;] ? oom_kill_process+0x82/0x2f0
[ 6102.795324]  [&amp;lt;ffffffff81135aa0&amp;gt;] ? out_of_memory+0x2e0/0x400
[ 6102.796506]  [&amp;lt;ffffffff81135c48&amp;gt;] ? pagefault_out_of_memory+0x88/0xf0
[ 6102.797830]  [&amp;lt;ffffffff8104b792&amp;gt;] ? mm_fault_error+0xb2/0x1a0
[ 6102.799027]  [&amp;lt;ffffffff8104bda1&amp;gt;] ? __do_page_fault+0x361/0x4b0
[ 6102.800229]  [&amp;lt;ffffffff810a420f&amp;gt;] ? hrtimer_try_to_cancel+0x3f/0xd0
[ 6102.801506]  [&amp;lt;ffffffff810a42c2&amp;gt;] ? hrtimer_cancel+0x22/0x30
[ 6102.802690]  [&amp;lt;ffffffff81553373&amp;gt;] ? do_nanosleep+0x93/0xc0
[ 6102.803826]  [&amp;lt;ffffffff810a4394&amp;gt;] ? hrtimer_nanosleep+0xc4/0x180
[ 6102.805046]  [&amp;lt;ffffffff810a31a0&amp;gt;] ? hrtimer_wakeup+0x0/0x30
[ 6102.806203]  [&amp;lt;ffffffff815581be&amp;gt;] ? do_page_fault+0x3e/0xa0
[ 6102.807377]  [&amp;lt;ffffffff81555305&amp;gt;] ? page_fault+0x25/0x30
[ 6102.808437] Mem-Info:
[ 6102.808940] Node 0 DMA per-cpu:
[ 6102.809658] CPU    0: hi:    0, btch:   1 usd:   0
[ 6102.810604] CPU    1: hi:    0, btch:   1 usd:   0
[ 6102.811622] CPU    2: hi:    0, btch:   1 usd:   0
[ 6102.812623] CPU    3: hi:    0, btch:   1 usd:   0
[ 6102.813618] CPU    4: hi:    0, btch:   1 usd:   0
[ 6102.814639] CPU    5: hi:    0, btch:   1 usd:   0
[ 6102.815648] CPU    6: hi:    0, btch:   1 usd:   0
[ 6102.816636] CPU    7: hi:    0, btch:   1 usd:   0
[ 6102.817630] Node 0 DMA32 per-cpu:
[ 6102.818371] CPU    0: hi:  186, btch:  31 usd:   0
[ 6102.819377] CPU    1: hi:  186, btch:  31 usd:   0
[ 6102.820362] CPU    2: hi:  186, btch:  31 usd:   0
[ 6102.821358] CPU    3: hi:  186, btch:  31 usd:  29
[ 6102.822380] CPU    4: hi:  186, btch:  31 usd: 183
[ 6102.823409] CPU    5: hi:  186, btch:  31 usd: 185
[ 6102.824399] CPU    6: hi:  186, btch:  31 usd:   0
[ 6102.825436] CPU    7: hi:  186, btch:  31 usd:  29
[ 6102.826450] Node 0 Normal per-cpu:
[ 6102.827215] CPU    0: hi:  186, btch:  31 usd:  42
[ 6102.828215] CPU    1: hi:  186, btch:  31 usd: 111
[ 6102.829214] CPU    2: hi:  186, btch:  31 usd: 119
[ 6102.830214] CPU    3: hi:  186, btch:  31 usd:  85
[ 6102.831220] CPU    4: hi:  186, btch:  31 usd: 123
[ 6102.832180] CPU    5: hi:  186, btch:  31 usd: 101
[ 6102.833195] CPU    6: hi:  186, btch:  31 usd:  92
[ 6102.834199] CPU    7: hi:  186, btch:  31 usd:  62
[ 6102.835218] active_anon:11528 inactive_anon:2 isolated_anon:0
[ 6102.835220]  active_file:30547 inactive_file:67676 isolated_file:0
[ 6102.835222]  unevictable:0 dirty:66 writeback:0 unstable:0
[ 6102.835223]  free:1778602 slab_reclaimable:11524 slab_unreclaimable:52625
[ 6102.835225]  mapped:2670 shmem:97 pagetables:4576 bounce:0
[ 6102.840842] Node 0 DMA free:15712kB min:124kB low:152kB high:184kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15320kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
[ 6102.847936] lowmem_reserve[]: 0 3512 8057 8057
[ 6102.849007] Node 0 DMA32 free:3294500kB min:29404kB low:36752kB high:44104kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:3596500kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
[ 6102.856173] lowmem_reserve[]: 0 0 4545 4545
[ 6102.857095] Node 0 Normal free:3804196kB min:38052kB low:47564kB high:57076kB active_anon:46112kB inactive_anon:8kB active_file:122188kB inactive_file:270704kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:4654080kB mlocked:0kB dirty:264kB writeback:0kB mapped:10680kB shmem:388kB slab_reclaimable:46096kB slab_unreclaimable:210500kB kernel_stack:4008kB pagetables:18304kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
[ 6102.864902] lowmem_reserve[]: 0 0 0 0
[ 6102.865818] Node 0 DMA: 2*4kB 1*8kB 1*16kB 2*32kB 2*64kB 1*128kB 0*256kB 0*512kB 1*1024kB 1*2048kB 3*4096kB = 15712kB
[ 6102.868334] Node 0 DMA32: 17*4kB 6*8kB 7*16kB 8*32kB 9*64kB 14*128kB 6*256kB 6*512kB 10*1024kB 6*2048kB 797*4096kB = 3294500kB
[ 6102.871010] Node 0 Normal: 543*4kB 248*8kB 1599*16kB 1004*32kB 308*64kB 41*128kB 20*256kB 2*512kB 0*1024kB 2*2048kB 905*4096kB = 3803948kB
[ 6102.873830] 98288 total pagecache pages
[ 6102.874639] 0 pages in swap cache
[ 6102.875328] Swap cache stats: add 0, delete 0, find 0/0
[ 6102.876360] Free swap  = 0kB
[ 6102.876926] Total swap = 0kB
[ 6102.911942] 2097151 pages RAM
[ 6102.912569] 89517 pages reserved
[ 6102.913241] 77093 pages shared
[ 6102.913876] 179862 pages non-shared
[ 6102.914612] [ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name
[ 6102.916160] [  570]     0   570     2728      242   2     -17         -1000 udevd
[ 6102.917621] [ 1062]     0  1062     2727      248   2     -17         -1000 udevd
[ 6102.919108] [ 1350]     0  1350     6910      200   0     -17         -1000 auditd
[ 6102.920590] [ 1366]     0  1366    62799      888   6       0             0 rsyslogd
[ 6102.922145] [ 1395]     0  1395     2705      148   5       0             0 irqbalance
...
[ 6103.308181] Out of memory: Kill process 1366 (rsyslogd) score 1 or sacrifice child
[ 6103.309573] Killed process 1366, UID 0, (rsyslogd) total-vm:251196kB, anon-rss:2528kB, file-rss:1028kB
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="25412">LU-5288</key>
            <summary>RPCs returning -ENOMEM may trigger OOM killer on clients</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jhammond">John Hammond</reporter>
                        <labels>
                            <label>malloc</label>
                            <label>osc</label>
                    </labels>
                <created>Wed, 2 Jul 2014 19:39:50 +0000</created>
                <updated>Mon, 31 Jan 2022 16:07:00 +0000</updated>
                            <resolved>Mon, 31 Jan 2022 16:07:00 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="324483" author="adilger" created="Sun, 30 Jan 2022 10:51:03 +0000"  >&lt;p&gt;Patrick, could this potentially be the cause of some OOM issues we are seeing?&lt;/p&gt;</comment>
                            <comment id="324592" author="paf0186" created="Mon, 31 Jan 2022 16:05:53 +0000"  >&lt;p&gt;We cleaned things up a while back, which I think is why we haven&apos;t seen this in a few years - ENOMEM was coming up in some weird situations where it shouldn&apos;t be in the error paths, I believe.&lt;/p&gt;

&lt;p&gt;So, I don&apos;t &lt;b&gt;think&lt;/b&gt; so?&#160; And I haven&apos;t seen any particular evidence of this specific issue.&#160; I&apos;ll bear it in mind but I think I&apos;m going to close it.&lt;/p&gt;</comment>
                            <comment id="324593" author="paf0186" created="Mon, 31 Jan 2022 16:06:25 +0000"  >&lt;p&gt;FWIW the &lt;b&gt;testing&lt;/b&gt; ENOMEM seems to have been all me leaving that debug_mb=10000 in place.&lt;/p&gt;</comment>
                            <comment id="324594" author="paf0186" created="Mon, 31 Jan 2022 16:07:00 +0000"  >&lt;p&gt;Hasn&apos;t been seen in some years - I suspect this went away with a rework of the LDLM error codes, as we were at one point getting spurious ENOMEMs.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwqh3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14754</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>