<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:34:41 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10393] OSS - OSS OOM during recovery</title>
                <link>https://jira.whamcloud.com/browse/LU-10393</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;soak-7 (OSS) OOM  after it stuck in orphan delete&lt;/p&gt;

&lt;p&gt;full cordump is on spirit: &lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@soak-16 10.10.1.107-2017-12-14-12:40:00&amp;#93;&lt;/span&gt;# pwd&lt;br/&gt;
/scratch/dumps/soak-7.spirit.hpdd.intel.com/10.10.1.107-2017-12-14-12:40:00&lt;/p&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[44337.534689] LustreError: dumping log to /tmp/lustre-log.1513254458.29742
[44337.745264] LNet: Service thread pid 9119 was inactive for 1205.40s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[44337.745265] Pid: 9119, comm: ll_ost01_014
[44337.745265]
Call Trace:
[44337.745267]  [&amp;lt;ffffffff816a9569&amp;gt;] schedule+0x29/0x70
[44337.745280]  [&amp;lt;ffffffffc07974d5&amp;gt;] cv_wait_common+0x125/0x150 [spl]
[44337.745281]  [&amp;lt;ffffffff810b1920&amp;gt;] ? autoremove_wake_function+0x0/0x40
[44337.745285]  [&amp;lt;ffffffffc0797515&amp;gt;] __cv_wait+0x15/0x20 [spl]
[44337.745327]  [&amp;lt;ffffffffc1fd8a33&amp;gt;] txg_wait_open+0xc3/0x110 [zfs]
[44337.745344]  [&amp;lt;ffffffffc1f8dd9a&amp;gt;] dmu_tx_wait+0x3aa/0x3c0 [zfs]
[44337.745359]  [&amp;lt;ffffffffc1f8de41&amp;gt;] dmu_tx_assign+0x91/0x490 [zfs]
[44337.745365]  [&amp;lt;ffffffffc0fbdf3a&amp;gt;] osd_trans_start+0xaa/0x3c0 [osd_zfs]
[44337.745373]  [&amp;lt;ffffffffc10eb198&amp;gt;] ofd_destroy+0x248/0x940 [ofd]
[44337.745377]  [&amp;lt;ffffffffc10e3b43&amp;gt;] ofd_destroy_by_fid+0x1e3/0x4a0 [ofd]
[44337.745402]  [&amp;lt;ffffffffc0dbf200&amp;gt;] ? ldlm_blocking_ast+0x0/0x170 [ptlrpc]
[44337.745423]  [&amp;lt;ffffffffc0db97a0&amp;gt;] ? ldlm_completion_ast+0x0/0x920 [ptlrpc]
[44337.745427]  [&amp;lt;ffffffffc10d84cd&amp;gt;] ofd_create_hdl+0x1abd/0x2090 [ofd]
[44337.745451]  [&amp;lt;ffffffffc0deee37&amp;gt;] ? lustre_msg_add_version+0x27/0xa0 [ptlrpc]
[44337.745480]  [&amp;lt;ffffffffc0e520d5&amp;gt;] tgt_request_handle+0x925/0x1370 [ptlrpc]
[44337.745504]  [&amp;lt;ffffffffc0dfaf16&amp;gt;] ptlrpc_server_handle_request+0x236/0xa90 [ptlrpc]
[44337.745528]  [&amp;lt;ffffffffc0df7548&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
[44337.745554]  [&amp;lt;ffffffffc0c14be1&amp;gt;] ? keys_fini+0xb1/0x1d0 [obdclass]
[44337.745578]  [&amp;lt;ffffffffc0df98d1&amp;gt;] ? ptlrpc_server_drop_request+0x431/0x6c0 [ptlrpc]
[44337.745602]  [&amp;lt;ffffffffc0dfe652&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
[44337.745625]  [&amp;lt;ffffffffc0dfdbc0&amp;gt;] ? ptlrpc_main+0x0/0x1e40 [ptlrpc]
[44337.745626]  [&amp;lt;ffffffff810b099f&amp;gt;] kthread+0xcf/0xe0
[44337.745627]  [&amp;lt;ffffffff810b08d0&amp;gt;] ? kthread+0x0/0xe0
[44337.745629]  [&amp;lt;ffffffff816b4fd8&amp;gt;] ret_from_fork+0x58/0x90
[44337.745630]  [&amp;lt;ffffffff810b08d0&amp;gt;] ? kthread+0x0/0xe0
[44337.745630] 
[44339.829743] LNet: Service thread pid 103074 was inactive for 1201.15s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[44339.829744] Pid: 103074, comm: ll_ost01_048
[44339.829744]
Call Trace:
[44339.829748]  [&amp;lt;ffffffff816aa489&amp;gt;] schedule_preempt_disabled+0x29/0x70
[44339.829750]  [&amp;lt;ffffffff816a83b7&amp;gt;] __mutex_lock_slowpath+0xc7/0x1d0
[44339.829752]  [&amp;lt;ffffffff816a77cf&amp;gt;] mutex_lock+0x1f/0x2f
[44339.829759]  [&amp;lt;ffffffffc10d77db&amp;gt;] ofd_create_hdl+0xdcb/0x2090 [ofd]
[44339.829793]  [&amp;lt;ffffffffc0deee37&amp;gt;] ? lustre_msg_add_version+0x27/0xa0 [ptlrpc]
[44339.829818]  [&amp;lt;ffffffffc0def18f&amp;gt;] ? lustre_pack_reply_v2+0x14f/0x280 [ptlrpc]
[44339.829842]  [&amp;lt;ffffffffc0def4b1&amp;gt;] ? lustre_pack_reply+0x11/0x20 [ptlrpc]
[44339.829872]  [&amp;lt;ffffffffc0e520d5&amp;gt;] tgt_request_handle+0x925/0x1370 [ptlrpc]
[44339.829897]  [&amp;lt;ffffffffc0dfaf16&amp;gt;] ptlrpc_server_handle_request+0x236/0xa90 [ptlrpc]
[44339.829921]  [&amp;lt;ffffffffc0df7548&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
[44339.829923]  [&amp;lt;ffffffff810c4832&amp;gt;] ? default_wake_function+0x12/0x20
[44339.829924]  [&amp;lt;ffffffff810ba598&amp;gt;] ? __wake_up_common+0x58/0x90
[44339.829949]  [&amp;lt;ffffffffc0dfe652&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
[44339.829973]  [&amp;lt;ffffffffc0dfdbc0&amp;gt;] ? ptlrpc_main+0x0/0x1e40 [ptlrpc]
[44339.829974]  [&amp;lt;ffffffff810b099f&amp;gt;] kthread+0xcf/0xe0
[44339.829975]  [&amp;lt;ffffffff810b08d0&amp;gt;] ? kthread+0x0/0xe0
[44339.829977]  [&amp;lt;ffffffff816b4fd8&amp;gt;] ret_from_fork+0x58/0x90
[44339.829978]  [&amp;lt;ffffffff810b08d0&amp;gt;] ? kthread+0x0/0xe0


[44343.925683] LNet: Service thread pid 103097 was inactive for 1202.37s. Watchdog stack traces are limited to 3 per 300 seconds, skipping this one.
[44343.925685] LNet: Skipped 3 previous similar messages
[44343.925687] LustreError: dumping log to /tmp/lustre-log.1513254464.103097
[44348.021604] LNet: Service thread pid 72214 was inactive for 1203.49s. Watchdog stack traces are limited to 3 per 300 seconds, skipping this one.
[44348.178864] LNet: Skipped 2 previous similar messages
[44348.240314] LustreError: dumping log to /tmp/lustre-log.1513254468.72214
[44771.642327] LNet: 5683:0:(o2iblnd_cb.c:3192:kiblnd_check_conns()) Timed out tx for 192.168.1.114@o2ib: 0 seconds
[44791.889635] Lustre: soaked-OST0005: Export ffff88074f3a3000 already connecting from 192.168.1.108@o2ib
[44792.001974] Lustre: Skipped 12 previous similar messages
[44929.515633] Lustre: 136835:0:(service.c:1346:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/5), not sending early reply
  req@ffff8804149d5450 x1586704105272848/t0(0) o4-&amp;gt;7df9a58e-df80-cd01-0d09-5737d3c03bbe@192.168.1.131@o2ib:250/0 lens 608/0 e 0 to 0 dl 1513255055 ref 2 fl New:/2/ffffffff rc 0/-1
[44929.613566] Lustre: 136835:0:(service.c:1346:ptlrpc_at_send_early_reply()) Skipped 795 previous similar messages
[44935.379406] Lustre: soaked-OST0005: Client soaked-MDT0003-mdtlov_UUID (at 192.168.1.111@o2ib) reconnecting
[44935.412601] Lustre: Skipped 77 previous similar messages
[44935.429973] Lustre: soaked-OST0005: Connection restored to 6943d9de-cea7-4a53-1661-77d616b076c6 (at 192.168.1.136@o2ib)
[44935.429975] Lustre: Skipped 79 previous similar messages
[44940.659687] gmain invoked oom-killer: gfp_mask=0x200da, order=0, oom_score_adj=0
[44940.685141] gmain cpuset=/ mems_allowed=0-1
[44940.699853] CPU: 10 PID: 2945 Comm: gmain Tainted: P           OE  ------------   3.10.0-693.5.2.el7_lustre.x86_64 #1
[44940.735748] Hardware name: Intel Corporation S2600GZ ........../S2600GZ, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
[44940.773670]  ffff8800b12d4f10 00000000b91ef903 ffff880411287988 ffffffff816a3e2d
[44940.799046]  ffff880411287a18 ffffffff8169f246 ffffffff810e93ac ffff880410fb97c0
[44940.824448]  ffff880410fb97d8 0000000000000202 0000000000000202 ffff880411287a08
[44940.849853] Call Trace:
[44940.858910]  [&amp;lt;ffffffff816a3e2d&amp;gt;] dump_stack+0x19/0x1b
[44940.876837]  [&amp;lt;ffffffff8169f246&amp;gt;] dump_header+0x90/0x229
[44940.895326]  [&amp;lt;ffffffff810e93ac&amp;gt;] ? ktime_get_ts64+0x4c/0xf0
[44940.914962]  [&amp;lt;ffffffff8113d37f&amp;gt;] ? delayacct_end+0x8f/0xb0
[44940.934307]  [&amp;lt;ffffffff811863a4&amp;gt;] oom_kill_process+0x254/0x3d0
[44940.954512]  [&amp;lt;ffffffff81185e4d&amp;gt;] ? oom_unkillable_task+0xcd/0x120
[44940.975861]  [&amp;lt;ffffffff81185ef6&amp;gt;] ? find_lock_task_mm+0x56/0xc0
[44940.996357]  [&amp;lt;ffffffff81186be6&amp;gt;] out_of_memory+0x4b6/0x4f0
[44941.015701]  [&amp;lt;ffffffff8169fd4a&amp;gt;] __alloc_pages_slowpath+0x5d6/0x724
[44941.037591]  [&amp;lt;ffffffff8118cdb5&amp;gt;] __alloc_pages_nodemask+0x405/0x420
[44941.059496]  [&amp;lt;ffffffff811d40a5&amp;gt;] alloc_pages_vma+0xb5/0x200
[44941.079116]  [&amp;lt;ffffffff811c44ad&amp;gt;] read_swap_cache_async+0xed/0x160
[44941.100448]  [&amp;lt;ffffffff811c45c8&amp;gt;] swapin_readahead+0xa8/0x110
[44941.120355]  [&amp;lt;ffffffff811b22cb&amp;gt;] handle_mm_fault+0xadb/0xfa0
[44941.140262]  [&amp;lt;ffffffff816b0074&amp;gt;] __do_page_fault+0x154/0x450
[44941.160165]  [&amp;lt;ffffffff816b03a5&amp;gt;] do_page_fault+0x35/0x90
[44941.178931]  [&amp;lt;ffffffff816ac5c8&amp;gt;] page_fault+0x28/0x30
[44941.196833] Mem-Info:
[44941.205292] active_anon:35 inactive_anon:0 isolated_anon:0
 active_file:0 inactive_file:80 isolated_file:0
 unevictable:6965 dirty:0 writeback:11 unstable:0
 slab_reclaimable:9803 slab_unreclaimable:234987
 mapped:1718 shmem:0 pagetables:1777 bounce:0
 free:39369 free_pcp:20 free_cma:0
[44941.317280] Node 0 DMA free:15576kB min:40kB low:48kB high:60kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15920kB managed:15836kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
[44941.457405] lowmem_reserve[]: 0 2580 15620 15620
[44941.473633] Node 0 DMA32 free:59480kB min:7320kB low:9148kB high:10980kB active_anon:4kB inactive_anon:4kB active_file:8kB inactive_file:4kB unevictable:568kB isolated(anon):0kB isolated(file):0kB present:3051628kB managed:2643828kB mlocked:568kB dirty:0kB writeback:0kB mapped:576kB shmem:0kB slab_reclaimable:2240kB slab_unreclaimable:59756kB kernel_stack:1056kB pagetables:360kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:118 all_unreclaimable? yes
[44941.623315] lowmem_reserve[]: 0 0 13040 13040
[44941.638694] Node 0 Normal free:36872kB min:37012kB low:46264kB high:55516kB active_anon:4kB inactive_anon:32kB active_file:0kB inactive_file:372kB unevictable:3804kB isolated(anon):0kB isolated(file):0kB present:13631488kB managed:13353036kB mlocked:3804kB dirty:0kB writeback:0kB mapped:4120kB shmem:0kB slab_reclaimable:16428kB slab_unreclaimable:312080kB kernel_stack:10832kB pagetables:3212kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:973 all_unreclaimable? yes
[44941.793202] lowmem_reserve[]: 0 0 0 0
[44941.806201] Node 1 Normal free:45552kB min:45728kB low:57160kB high:68592kB active_anon:140kB inactive_anon:0kB active_file:44kB inactive_file:184kB unevictable:23488kB isolated(anon):0kB isolated(file):0kB present:16777216kB managed:16498508kB mlocked:23488kB dirty:0kB writeback:44kB mapped:2180kB shmem:0kB slab_reclaimable:20544kB slab_unreclaimable:568112kB kernel_stack:19024kB pagetables:3536kB unstable:0kB bounce:0kB free_pcp:4kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:1148 all_unreclaimable? yes
[44941.962556] lowmem_reserve[]: 0 0 0 0
[44941.975591] Node 0 DMA: 0*4kB 1*8kB (U) 1*16kB (U) 0*32kB 1*64kB (U) 1*128kB (U) 0*256kB 0*512kB 1*1024kB (U) 1*2048kB (M) 3*4096kB (M) = 15576kB
[44942.020723] Node 0 DMA32: 400*4kB (UEM) 615*8kB (UEM) 478*16kB (UEM) 418*32kB (UEM) 172*64kB (UEM) 22*128kB (EM) 11*256kB (M) 6*512kB (UM) 4*1024kB (UM) 4*2048kB (M) 0*4096kB = 59544kB
[44942.077164] Node 0 Normal: 183*4kB (UEM) 883*8kB (UEM) 494*16kB (UEM) 417*32kB (UEM) 118*64kB (UEM) 10*128kB (M) 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 37876kB
[44942.128198] Node 1 Normal: 1054*4kB (UEM) 729*8kB (UEM) 488*16kB (UEM) 382*32kB (UEM) 265*64kB (UEM) 13*128kB (UM) 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 48704kB
[44942.179875] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB
[44942.209631] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
[44942.238520] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB
[44942.268263] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
[44942.297135] 2053 total pagecache pages
[44942.310568] 0 pages in swap cache
[44942.322563] Swap cache stats: add 69725, delete 69725, find 14748/19926
[44942.345438] Free swap  = 16207300kB
[44942.358023] Total swap = 16319484kB

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>lustre-reviews-ib #3037</environment>
        <key id="49755">LU-10393</key>
            <summary>OSS - OSS OOM during recovery</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="sarah">Sarah Liu</reporter>
                        <labels>
                            <label>soak</label>
                            <label>zfs</label>
                    </labels>
                <created>Thu, 14 Dec 2017 22:39:26 +0000</created>
                <updated>Tue, 16 Jan 2018 14:13:39 +0000</updated>
                                            <version>Lustre 2.10.2</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>1</watches>
                                                                                    <attachments>
                            <attachment id="28945" name="vmcore-dmesg.txt" size="872224" author="sarah" created="Thu, 14 Dec 2017 22:42:58 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzpif:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>