<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:25:27 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2468] MDS out of memory, blocked in ldlm_pools_shrink()</title>
                <link>https://jira.whamcloud.com/browse/LU-2468</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/35a3873c-4166-11e2-af91-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/35a3873c-4166-11e2-af91-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_24a failed with the following error on the MDS console:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: == conf-sanity test 24a: Multiple MDTs on a single node == 11:13:50 (1355166830)&lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: grep -c /mnt/fs2mds&apos; &apos; /proc/mounts&lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp; lctl dl | grep &apos; ST &apos;&lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: mkfs.lustre --mgsnode=client-29vm3@tcp --fsname=lustre --mdt --index=0 --param=sys.timeout=20 --param=lov.stripesize=1048576 --param=lov.stripecount=0 --param=mdt.identity_upcall=/usr/sbin/l_getidentity --backfstype=ldiskfs --device-size=2097152 --mkfsopt&lt;br/&gt;
11:14:17:LDISKFS-fs (dm-1): mounted filesystem with ordered data mode. quota=on. Opts: &lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: mkdir -p /mnt/mds1&lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: test -b /dev/lvm-MDS/P1&lt;br/&gt;
11:14:17:Lustre: DEBUG MARKER: mkdir -p /mnt/mds1; mount -t lustre -o user_xattr,acl  		                   /dev/lvm-MDS/P1 /mnt/mds1&lt;br/&gt;
11:14:17:LDISKFS-fs (dm-0): mounted filesystem with ordered data mode. quota=on. Opts: &lt;br/&gt;
11:14:17:Lustre: MGC10.10.4.174@tcp: Reactivating import&lt;br/&gt;
11:14:17:Lustre: lustre-MDT0000: used disk, loading&lt;br/&gt;
11:14:17:__ratelimit: 582 callbacks suppressed&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;br/&gt;
11:14:17:cannot allocate a tage (0)&lt;/p&gt;

&lt;p&gt;11:15:55:LNet: Service thread pid 16764 was inactive for 40.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:&lt;br/&gt;
11:15:55:Pid: 16764, comm: mdt01_001&lt;br/&gt;
11:15:55:&lt;br/&gt;
11:15:55:Call Trace:&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e19d97&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_hash_bd_lookup_intent+0x37/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814ffa2e&amp;gt;&amp;#93;&lt;/span&gt; ? __mutex_lock_slowpath+0x13e/0x180&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa051a691&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_cli_pool_shrink+0x71/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e14591&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814ff8cb&amp;gt;&amp;#93;&lt;/span&gt; ? mutex_lock+0x2b/0x50&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0568e40&amp;gt;&amp;#93;&lt;/span&gt; ? enc_pools_shrink+0x3d0/0x560 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05198c3&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_pools_srv_shrink+0x13/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112d34a&amp;gt;&amp;#93;&lt;/span&gt; ? shrink_slab+0x8a/0x1a0&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112f36f&amp;gt;&amp;#93;&lt;/span&gt; ? do_try_to_free_pages+0x2ff/0x520&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100bc0e&amp;gt;&amp;#93;&lt;/span&gt; ? apic_timer_interrupt+0xe/0x20&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112f77d&amp;gt;&amp;#93;&lt;/span&gt; ? try_to_free_pages+0x9d/0x130&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811308d0&amp;gt;&amp;#93;&lt;/span&gt; ? isolate_pages_global+0x0/0x350&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112758d&amp;gt;&amp;#93;&lt;/span&gt; ? __alloc_pages_nodemask+0x40d/0x940&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81162372&amp;gt;&amp;#93;&lt;/span&gt; ? kmem_getpages+0x62/0x170&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81162f8a&amp;gt;&amp;#93;&lt;/span&gt; ? fallback_alloc+0x1ba/0x270&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811629df&amp;gt;&amp;#93;&lt;/span&gt; ? cache_grow+0x2cf/0x320&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81162d09&amp;gt;&amp;#93;&lt;/span&gt; ? ____cache_alloc_node+0x99/0x160&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81163aeb&amp;gt;&amp;#93;&lt;/span&gt; ? kmem_cache_alloc+0x11b/0x190&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e04af2&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_mem_cache_alloc+0x22/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa071b00a&amp;gt;&amp;#93;&lt;/span&gt; ? osc_session_init+0x3a/0x200 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ee0baf&amp;gt;&amp;#93;&lt;/span&gt; ? keys_fill+0x6f/0x1a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ee499b&amp;gt;&amp;#93;&lt;/span&gt; ? lu_context_init+0xab/0x260 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0542db4&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_server_handle_request+0x194/0xe00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e0465e&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_timer_arm+0xe/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e160ef&amp;gt;&amp;#93;&lt;/span&gt; ? lc_watchdog_touch+0x6f/0x180 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa053a429&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa9/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e14591&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81053463&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05445d5&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0xbb5/0x1970 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:15:55: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0543a20&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1970 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;11:31:32:INFO: task mount.lustre:17152 blocked for more than 120 seconds.&lt;br/&gt;
11:31:32:mount.lustre  D 0000000000000000     0 17152      1 0x00100084&lt;br/&gt;
11:31:32: ffff88001e843698 0000000000000082 0000000000000000 ffffffff81060af3&lt;br/&gt;
11:31:32: 0000000050c63493 0000000000000282 0000000000800500 0000000000000000&lt;br/&gt;
11:31:32: ffff88001a3e3098 ffff88001e843fd8 000000000000fb88 ffff88001a3e3098&lt;br/&gt;
11:31:32:Call Trace:&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81060af3&amp;gt;&amp;#93;&lt;/span&gt; ? wake_up_new_task+0xd3/0x120&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814ff245&amp;gt;&amp;#93;&lt;/span&gt; schedule_timeout+0x215/0x2e0&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e840b0&amp;gt;&amp;#93;&lt;/span&gt; ? llog_process_thread_daemonize+0x0/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c0e2&amp;gt;&amp;#93;&lt;/span&gt; ? kernel_thread+0x82/0xe0&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e840b0&amp;gt;&amp;#93;&lt;/span&gt; ? llog_process_thread_daemonize+0x0/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814feec3&amp;gt;&amp;#93;&lt;/span&gt; wait_for_common+0x123/0x180&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810602c0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e0b77a&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_create_thread+0x7a/0xa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ec8650&amp;gt;&amp;#93;&lt;/span&gt; ? class_config_llog_handler+0x0/0x1850 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff814fefdd&amp;gt;&amp;#93;&lt;/span&gt; wait_for_completion+0x1d/0x20&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e85ae3&amp;gt;&amp;#93;&lt;/span&gt; llog_process_or_fork+0x333/0x660 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e85e24&amp;gt;&amp;#93;&lt;/span&gt; llog_process+0x14/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ebdd64&amp;gt;&amp;#93;&lt;/span&gt; class_config_parse_llog+0x1e4/0x340 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0821ced&amp;gt;&amp;#93;&lt;/span&gt; mgc_process_cfg_log+0x5cd/0x1600 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0823163&amp;gt;&amp;#93;&lt;/span&gt; mgc_process_log+0x443/0x1350 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e14591&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e14591&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa081da80&amp;gt;&amp;#93;&lt;/span&gt; ? mgc_blocking_ast+0x0/0x780 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0509930&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_completion_ast+0x0/0x980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0825974&amp;gt;&amp;#93;&lt;/span&gt; mgc_process_config+0x594/0xee0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ecfc1c&amp;gt;&amp;#93;&lt;/span&gt; lustre_process_log+0x25c/0xad0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e14591&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e0f028&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_log_return+0x28/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ed1691&amp;gt;&amp;#93;&lt;/span&gt; ? server_register_mount+0x551/0x8f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0edd607&amp;gt;&amp;#93;&lt;/span&gt; server_start_targets+0x5c7/0x18f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e0f028&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_log_return+0x28/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ed86a0&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_start_mgc+0x4e0/0x1bc0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e14591&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ec8650&amp;gt;&amp;#93;&lt;/span&gt; ? class_config_llog_handler+0x0/0x1850 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0edfd87&amp;gt;&amp;#93;&lt;/span&gt; lustre_fill_super+0x1457/0x1b00 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117d200&amp;gt;&amp;#93;&lt;/span&gt; ? set_anon_super+0x0/0x100&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ede930&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_fill_super+0x0/0x1b00 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e66f&amp;gt;&amp;#93;&lt;/span&gt; get_sb_nodev+0x5f/0xa0&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0ec9fa5&amp;gt;&amp;#93;&lt;/span&gt; lustre_get_sb+0x25/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e2cb&amp;gt;&amp;#93;&lt;/span&gt; vfs_kern_mount+0x7b/0x1b0&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8117e472&amp;gt;&amp;#93;&lt;/span&gt; do_kern_mount+0x52/0x130&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119cb42&amp;gt;&amp;#93;&lt;/span&gt; do_mount+0x2d2/0x8d0&lt;br/&gt;
11:31:32: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119d1d0&amp;gt;&amp;#93;&lt;/span&gt; sys_mount+0x90/0xe0&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;It is likely that this is only a symptom of something else consuming memory on the MDS, and not the root cause.&lt;/p&gt;

&lt;p&gt;Info required for matching: conf-sanity 24a&lt;/p&gt;</description>
                <environment></environment>
        <key id="16899">LU-2468</key>
            <summary>MDS out of memory, blocked in ldlm_pools_shrink()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="sarah">Sarah Liu</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 11 Dec 2012 14:01:52 +0000</created>
                <updated>Wed, 28 Oct 2015 20:44:01 +0000</updated>
                            <resolved>Wed, 28 Oct 2015 20:44:01 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>16</watches>
                                                                            <comments>
                            <comment id="49081" author="green" created="Tue, 11 Dec 2012 18:12:22 +0000"  >&lt;p&gt;It looks like we have some low-memory deadlock between ldlm pool thread and something else. And in addition somethign started to use more RAM of course.&lt;/p&gt;

&lt;p&gt;her&apos;s my report from tonight&apos;s testing, OOM suddenly started in the middle of sanity test60, which is a kernel-module test and should not be leaking anything I would imagine:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 11332  TASK: ffff880026c964c0  CPU: 1   COMMAND: &quot;mdt00_004&quot;
 #0 [ffff8800104837d0] schedule at ffffffff814f7c98
 #1 [ffff880010483828] _spin_lock at ffffffff814fafde
 #2 [ffff880010483908] mutex_lock at ffffffff814f97e1
 #3 [ffff880010483928] ldlm_pools_shrink at ffffffffa0ef515c [ptlrpc]
 #4 [ffff880010483978] ldlm_pools_cli_shrink at ffffffffa0ef5423 [ptlrpc]
 #5 [ffff880010483988] shrink_slab at ffffffff8112b5aa
 #6 [ffff8800104839e8] do_try_to_free_pages at ffffffff8112d5cf
 #7 [ffff880010483a78] try_to_free_pages at ffffffff8112d9dd
 #8 [ffff880010483b28] __alloc_pages_nodemask at ffffffff8112570c
 #9 [ffff880010483c38] kmem_getpages at ffffffff81160a62
#10 [ffff880010483c68] fallback_alloc at ffffffff8116349c
#11 [ffff880010483ce8] ____cache_alloc_node at ffffffff81163188
#12 [ffff880010483d58] __kmalloc at ffffffff81163838
#13 [ffff880010483da8] cfs_alloc at ffffffffa08f7c10 [libcfs]
#14 [ffff880010483dd8] ptlrpc_at_check_timed at ffffffffa0f1e361 [ptlrpc]
#15 [ffff880010483e98] ptlrpc_main at ffffffffa0f1fd30 [ptlrpc]
#16 [ffff880010483f48] kernel_thread at ffffffff8100c14a

PID: 5875   TASK: ffff880098692400  CPU: 2   COMMAND: &quot;lc_watchdogd&quot;
 #0 [ffff88001d9ab600] schedule at ffffffff814f7c98
 #1 [ffff88001d9ab678] cfs_hash_bd_lookup_intent at ffffffffa090ce97 [libcfs]
 #2 [ffff88001d9ab6d8] _spin_lock at ffffffff814fafde
 #3 [ffff88001d9ab6e8] cfs_hash_spin_lock at ffffffffa090d89e [libcfs]
 #4 [ffff88001d9ab6f8] lu_site_stats_get at ffffffffa0dd5b70 [obdclass]
 #5 [ffff88001d9ab758] lu_cache_shrink at ffffffffa0dd782d [obdclass]
 #6 [ffff88001d9ab7d8] shrink_slab at ffffffff8112b5aa
 #7 [ffff88001d9ab838] do_try_to_free_pages at ffffffff8112d5cf
 #8 [ffff88001d9ab8c8] try_to_free_pages at ffffffff8112d9dd
 #9 [ffff88001d9ab978] __alloc_pages_nodemask at ffffffff8112570c
#10 [ffff88001d9aba88] kmem_getpages at ffffffff81160a62
#11 [ffff88001d9abab8] fallback_alloc at ffffffff8116349c
#12 [ffff88001d9abb38] ____cache_alloc_node at ffffffff81163188
#13 [ffff88001d9abba8] kmem_cache_alloc at ffffffff81163c6b
#14 [ffff88001d9abbf8] get_empty_filp at ffffffff8117c829
#15 [ffff88001d9abc18] do_filp_open at ffffffff8118bd35
#16 [ffff88001d9abd78] filp_open at ffffffff8118c88d
#17 [ffff88001d9abd88] cfs_filp_open at ffffffffa08fc434 [libcfs]
#18 [ffff88001d9abdb8] cfs_tracefile_dump_all_pages at ffffffffa09080f4 [libcfs]
#19 [ffff88001d9abe48] libcfs_debug_dumplog_internal at ffffffffa0902445 [libcfs]
#20 [ffff88001d9abe78] lc_watchdog_dumplog at ffffffffa09089a1 [libcfs]
#21 [ffff88001d9abe88] lcw_dispatch_main at ffffffffa090a0db [libcfs]
#22 [ffff88001d9abf48] kernel_thread at ffffffff8100c14a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I am not really sure how come spinlock comes after that path or why tasks with schedule as the last call sit on CPU (this is bt -a output in crash)&lt;/p&gt;</comment>
                            <comment id="49215" author="sarah" created="Thu, 13 Dec 2012 17:12:47 +0000"  >&lt;p&gt;another failure &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/92d0fe6c-44a9-11e2-8c8b-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/92d0fe6c-44a9-11e2-8c8b-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="49220" author="sarah" created="Thu, 13 Dec 2012 17:41:18 +0000"  >&lt;p&gt;another MDS OOM error seen in performance-sanity test:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/e6838dd8-44b1-11e2-8c8b-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/e6838dd8-44b1-11e2-8c8b-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;10:57:59:Lustre: DEBUG MARKER: == performance-sanity test 3: small file create/open/delete ======== 10:57:53 (1355252273)
10:57:59:Lustre: DEBUG MARKER: /usr/sbin/lctl mark ===== mdsrate-create-small.sh ====== 
10:57:59:Lustre: DEBUG MARKER: ===== mdsrate-create-small.sh ======
10:57:59:Lustre: DEBUG MARKER: running=$(grep -c /mnt/mds1&apos; &apos; /proc/mounts);
10:57:59:mpts=$(mount | grep -c /mnt/mds1&apos; &apos;);
10:57:59:if [ $running -ne $mpts ]; then
10:57:59:    echo $(hostname) env are INSANE!;
10:57:59:    exit 1;
10:57:59:fi
10:57:59:Lustre: DEBUG MARKER: running=$(grep -c /mnt/mds1&apos; &apos; /proc/mounts);
10:57:59:mpts=$(mount | grep -c /mnt/mds1&apos; &apos;);
10:57:59:if [ $running -ne $mpts ]; then
10:57:59:    echo $(hostname) env are INSANE!;
10:57:59:    exit 1;
10:57:59:fi
10:57:59:Lustre: DEBUG MARKER: e2label /dev/lvm-MDS/P1 2&amp;gt;/dev/null
10:58:00:Lustre: DEBUG MARKER: cat /proc/mounts
10:58:11:Lustre: DEBUG MARKER: lctl get_param -n timeout
10:58:11:Lustre: DEBUG MARKER: /usr/sbin/lctl mark Using TIMEOUT=20
10:58:11:Lustre: DEBUG MARKER: Using TIMEOUT=20
10:58:11:Lustre: DEBUG MARKER: lctl dl | grep &apos; IN osc &apos; 2&amp;gt;/dev/null | wc -l
10:58:11:Lustre: DEBUG MARKER: /usr/sbin/lctl conf_param lustre.sys.jobid_var=procname_uid
10:58:11:Lustre: DEBUG MARKER: /usr/sbin/lctl get_param -n version
10:58:12:Lustre: DEBUG MARKER: /usr/sbin/lctl get_param -n osd-ldiskfs.lustre-MDT0000.quota_slave.enabled
10:58:12:Lustre: DEBUG MARKER: /usr/sbin/lctl conf_param lustre.quota.mdt=ug3
10:58:12:Lustre: DEBUG MARKER: /usr/sbin/lctl conf_param lustre.quota.ost=ug3
10:58:12:Lustre: DEBUG MARKER: PATH=/usr/lib64/lustre/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:/opt/iozone/bin:/opt/iozone/bin:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lustre/tests/racer:/usr/lib64/lustre/../lustre-iokit/sgpdd-survey:/usr/lib64/lustre/tests:/us
10:58:12:__ratelimit: 1 callbacks suppressed
10:58:12:cannot allocate a tage (259)
10:58:12:LNet: 12909:0:(debug.c:324:libcfs_debug_str2mask()) You are trying to use a numerical value for the mask - this will be deprecated in a future release.
10:58:12:LNet: 12909:0:(debug.c:324:libcfs_debug_str2mask()) Skipped 1 previous similar message
10:58:12:cannot allocate a tage (259)
10:58:12:Lustre: ctl-lustre-MDT0000: super-sequence allocation rc = 0 [0x0000000200000400-0x0000000240000400):0:mdt
10:58:12:cannot allocate a tage (259)
10:58:12:Lustre: DEBUG MARKER: /usr/sbin/lctl mark ===== mdsrate-create-small.sh ### 1 NODE CREATE ###
10:58:12:cannot allocate a tage (259)
10:58:12:Lustre: DEBUG MARKER: ===== mdsrate-create-small.sh
10:58:12:cannot allocate a tage (259)
10:58:12:cannot allocate a tage (259)
10:58:12:cannot allocate a tage (259)
10:58:12:cannot allocate a tage (259)
10:58:12:cannot allocate a tage (259)
10:58:12:cannot allocate a tage (259)
11:02:36:Lustre: DEBUG MARKER: /usr/sbin/lctl mark ===== mdsrate-create-small.sh ### 1 NODE UNLINK ###
11:02:37:Lustre: DEBUG MARKER: ===== mdsrate-create-small.sh
11:08:21:Lustre: DEBUG MARKER: /usr/sbin/lctl mark ===== mdsrate-create-small.sh ### 2 NODES CREATE with 3 threads per client ###
11:08:21:Lustre: DEBUG MARKER: ===== mdsrate-create-small.sh
11:09:42:__ratelimit: 12 callbacks suppressed
11:09:42:sssd_nss invoked oom-killer: gfp_mask=0x201da, order=0, oom_adj=0, oom_score_adj=0
11:09:42:sssd_nss cpuset=/ mems_allowed=0
11:09:42:Pid: 1831, comm: sssd_nss Not tainted 2.6.32-279.14.1.el6_lustre.x86_64 #1
11:09:42:Call Trace:
11:09:42: [&amp;lt;ffffffff810c4cd1&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
11:09:42: [&amp;lt;ffffffff81117440&amp;gt;] ? dump_header+0x90/0x1b0
11:09:42: [&amp;lt;ffffffff81500d07&amp;gt;] ? _spin_unlock_irqrestore+0x17/0x20
11:09:42: [&amp;lt;ffffffff811178c2&amp;gt;] ? oom_kill_process+0x82/0x2a0
11:09:42: [&amp;lt;ffffffff811177be&amp;gt;] ? select_bad_process+0x9e/0x120
11:09:42: [&amp;lt;ffffffff81117d00&amp;gt;] ? out_of_memory+0x220/0x3c0
11:09:42: [&amp;lt;ffffffff81127a1e&amp;gt;] ? __alloc_pages_nodemask+0x89e/0x940
11:09:42: [&amp;lt;ffffffff8115c7ea&amp;gt;] ? alloc_pages_current+0xaa/0x110
11:09:42: [&amp;lt;ffffffff81114847&amp;gt;] ? __page_cache_alloc+0x87/0x90
11:09:42: [&amp;lt;ffffffff8112a46b&amp;gt;] ? __do_page_cache_readahead+0xdb/0x210
11:09:42: [&amp;lt;ffffffff8112a5c1&amp;gt;] ? ra_submit+0x21/0x30
11:09:42: [&amp;lt;ffffffff81115b73&amp;gt;] ? filemap_fault+0x4c3/0x500
11:09:42: [&amp;lt;ffffffff8113ef74&amp;gt;] ? __do_fault+0x54/0x510
11:09:42: [&amp;lt;ffffffff8113f527&amp;gt;] ? handle_pte_fault+0xf7/0xb50
11:09:42: [&amp;lt;ffffffff811401ba&amp;gt;] ? handle_mm_fault+0x23a/0x310
11:09:42: [&amp;lt;ffffffff810444c9&amp;gt;] ? __do_page_fault+0x139/0x480
11:09:42: [&amp;lt;ffffffff81039688&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
11:09:42: [&amp;lt;ffffffff8103877c&amp;gt;] ? kvm_clock_read+0x1c/0x20
11:09:42: [&amp;lt;ffffffff81038789&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
11:09:42: [&amp;lt;ffffffff8109cb30&amp;gt;] ? getnstimeofday+0x60/0xf0
11:09:42: [&amp;lt;ffffffff81503e0e&amp;gt;] ? do_page_fault+0x3e/0xa0
11:09:42: [&amp;lt;ffffffff815011c5&amp;gt;] ? page_fault+0x25/0x30
11:09:43:Mem-Info:
11:09:43:Node 0 DMA per-cpu:
11:09:43:CPU    0: hi:    0, btch:   1 usd:   0
11:09:43:CPU    1: hi:    0, btch:   1 usd:   0
11:09:43:CPU    2: hi:    0, btch:   1 usd:   0
11:09:43:CPU    3: hi:    0, btch:   1 usd:   0
11:09:43:CPU    4: hi:    0, btch:   1 usd:   0
11:09:43:CPU    5: hi:    0, btch:   1 usd:   0
11:09:43:CPU    6: hi:    0, btch:   1 usd:   0
11:09:43:CPU    7: hi:    0, btch:   1 usd:   0
11:09:43:CPU    8: hi:    0, btch:   1 usd:   0
11:09:43:CPU    9: hi:    0, btch:   1 usd:   0
11:09:43:CPU   10: hi:    0, btch:   1 usd:   0
11:09:43:CPU   11: hi:    0, btch:   1 usd:   0
11:09:43:CPU   12: hi:    0, btch:   1 usd:   0
11:09:43:CPU   13: hi:    0, btch:   1 usd:   0
11:09:43:CPU   14: hi:    0, btch:   1 usd:   0
11:09:43:CPU   15: hi:    0, btch:   1 usd:   0
11:09:43:CPU   16: hi:    0, btch:   1 usd:   0
11:09:43:CPU   17: hi:    0, btch:   1 usd:   0
11:09:43:CPU   18: hi:    0, btch:   1 usd:   0
11:09:43:CPU   19: hi:    0, btch:   1 usd:   0
11:09:43:CPU   20: hi:    0, btch:   1 usd:   0
11:09:43:CPU   21: hi:    0, btch:   1 usd:   0
11:09:43:CPU   22: hi:    0, btch:   1 usd:   0
11:09:43:CPU   23: hi:    0, btch:   1 usd:   0
11:09:43:Node 0 DMA32 per-cpu:
11:09:43:CPU    0: hi:  186, btch:  31 usd:   0
11:09:43:CPU    1: hi:  186, btch:  31 usd:   0
11:09:43:CPU    2: hi:  186, btch:  31 usd:   0
11:09:43:CPU    3: hi:  186, btch:  31 usd:   0
11:09:43:CPU    4: hi:  186, btch:  31 usd:   0
11:09:43:CPU    5: hi:  186, btch:  31 usd:   0
11:09:43:CPU    6: hi:  186, btch:  31 usd:   0
11:09:43:CPU    7: hi:  186, btch:  31 usd:   0
11:09:43:CPU    8: hi:  186, btch:  31 usd:   0
11:09:43:CPU    9: hi:  186, btch:  31 usd:   0
11:09:43:CPU   10: hi:  186, btch:  31 usd:   0
11:09:43:CPU   11: hi:  186, btch:  31 usd:   0
11:09:43:CPU   12: hi:  186, btch:  31 usd:   0
11:09:43:CPU   13: hi:  186, btch:  31 usd:   0
11:09:43:CPU   14: hi:  186, btch:  31 usd:   0
11:09:43:CPU   15: hi:  186, btch:  31 usd:   0
11:09:43:CPU   16: hi:  186, btch:  31 usd:   0
11:09:43:CPU   17: hi:  186, btch:  31 usd:   0
11:09:43:CPU   18: hi:  186, btch:  31 usd:   0
11:09:43:CPU   19: hi:  186, btch:  31 usd:   0
11:09:43:CPU   20: hi:  186, btch:  31 usd:   0
11:09:43:CPU   21: hi:  186, btch:  31 usd:   0
11:09:43:CPU   22: hi:  186, btch:  31 usd:   0
11:09:43:CPU   23: hi:  186, btch:  31 usd:   0
11:09:43:active_anon:53 inactive_anon:100 isolated_anon:0
11:09:43: active_file:8727 inactive_file:8692 isolated_file:0
11:09:43: unevictable:0 dirty:0 writeback:0 unstable:0
11:09:43: free:13216 slab_reclaimable:33653 slab_unreclaimable:33437
11:09:43: mapped:36 shmem:0 pagetables:750 bounce:0
11:09:43:Node 0 DMA free:8352kB min:332kB low:412kB high:496kB active_anon:8kB inactive_anon:8kB active_file:640kB inactive_file:196kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15324kB mlocked:0kB dirty:0kB writeback:0kB mapped:4kB shmem:0kB slab_reclaimable:4852kB slab_unreclaimable:916kB kernel_stack:24kB pagetables:0kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:1284 all_unreclaimable? yes
11:09:43:lowmem_reserve[]: 0 2003 2003 2003
11:09:43:Node 0 DMA32 free:44512kB min:44720kB low:55900kB high:67080kB active_anon:204kB inactive_anon:392kB active_file:34268kB inactive_file:34572kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:2052060kB mlocked:0kB dirty:0kB writeback:0kB mapped:140kB shmem:0kB slab_reclaimable:129760kB slab_unreclaimable:132832kB kernel_stack:5624kB pagetables:3000kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:148106 all_unreclaimable? yes
11:09:43:lowmem_reserve[]: 0 0 0 0
11:09:43:Node 0 DMA: 147*4kB 124*8kB 68*16kB 28*32kB 9*64kB 3*128kB 1*256kB 3*512kB 0*1024kB 1*2048kB 0*4096kB = 8364kB
11:09:43:Node 0 DMA32: 3090*4kB 2086*8kB 673*16kB 90*32kB 2*64kB 1*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 46792kB
11:09:43:17779 total pagecache pages
11:09:43:38 pages in swap cache
11:09:43:Swap cache stats: add 4557, delete 4519, find 87/119
11:09:43:Free swap  = 4111556kB
11:09:43:Total swap = 4128760kB
11:09:43:524283 pages RAM
11:09:43:44357 pages reserved
11:09:43:17679 pages shared
11:09:43:437072 pages non-shared
11:09:43:[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name
11:09:43:[  707]     0   707     2761        0  23     -17         -1000 udevd
11:09:43:[ 1740]     0  1740     6909        1   4     -17         -1000 auditd
11:09:43:[ 1756]     0  1756    62271        1   0       0             0 rsyslogd
11:09:43:[ 1785]     0  1785     2286        1   4       0             0 irqbalance
11:09:43:[ 1799]    32  1799     4743        1   4       0             0 rpcbind
11:09:43:[ 1811]     0  1811    45434       38  17       0             0 sssd
11:09:43:[ 1824]     0  1824    50962        4  13       0             0 sssd_be
11:09:43:[ 1829]    29  1829     6354        1   0       0             0 rpc.statd
11:09:43:[ 1831]     0  1831    42905       23   3       0             0 sssd_nss
11:09:43:[ 1832]     0  1832    42854        2   5       0             0 sssd_pam
11:09:43:[ 1980]    81  1980     5868        1   4       0             0 dbus-daemon
11:09:43:[ 2013]     0  2013     1019        0   5       0             0 acpid
11:09:43:[ 2022]    68  2022     6806        1   4       0             0 hald
11:09:43:[ 2023]     0  2023     4525        1   4       0             0 hald-runner
11:09:43:[ 2051]     0  2051     5054        1   0       0             0 hald-addon-inpu
11:09:43:[ 2061]    68  2061     4450        1   5       0             0 hald-addon-acpi
11:09:43:[ 2082]     0  2082   167751        1   0       0             0 automount
11:09:43:[ 2123]     0  2123    26826        0   9       0             0 rpc.rquotad
11:09:43:[ 2127]     0  2127     5413        0   1       0             0 rpc.mountd
11:09:43:[ 2176]     0  2176     6290        1   1       0             0 rpc.idmapd
11:09:43:[ 2201]     0  2201    16017        0   1     -17         -1000 sshd
11:09:43:[ 2209]     0  2209     5522        1   1       0             0 xinetd
11:09:43:[ 2217]    38  2217     7004        1   4       0             0 ntpd
11:09:43:[ 2233]     0  2233    22181       35   2       0             0 sendmail
11:09:43:[ 2241]    51  2241    19527        0   1       0             0 sendmail
11:09:43:[ 2263]     0  2263    27017        1   4       0             0 abrt-dump-oops
11:09:43:[ 2271]     0  2271    29301        8   4       0             0 crond
11:09:43:[ 2282]     0  2282     5363        0   0       0             0 atd
11:09:43:[ 2295]     0  2295     1018        1   0       0             0 agetty
11:09:43:[ 2297]     0  2297     1015        1   7       0             0 mingetty
11:09:43:[ 2299]     0  2299     1015        1  16       0             0 mingetty
11:09:43:[ 2301]     0  2301     1015        1   7       0             0 mingetty
11:09:43:[ 2303]     0  2303     1015        1  16       0             0 mingetty
11:09:43:[ 2305]     0  2305     2760        0   8     -17         -1000 udevd
11:09:43:[ 2306]     0  2306     1015        1   7       0             0 mingetty
11:09:43:[ 2307]     0  2307     2760        0  12     -17         -1000 udevd
11:09:43:[ 2309]     0  2309     1015        1  21       0             0 mingetty
11:09:43:[12989]     0 12989     4324        0   4       0             0 anacron
11:09:43:Out of memory: Kill process 1756 (rsyslogd) score 1 or sacrifice child
11:09:43:Killed process 1756, UID 0, (rsyslogd) total-vm:249084kB, anon-rss:0kB, file-rss:4kB
11:09:43:sssd_be invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0, oom_score_adj=0
11:09:43:sssd_be cpuset=/ mems_allowed=0
11:09:43:Pid: 1824, comm: sssd_be Not tainted 2.6.32-279.14.1.el6_lustre.x86_64 #1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="49222" author="keith" created="Thu, 13 Dec 2012 18:04:50 +0000"  >&lt;p&gt;Failure Rate: 31.00% of last 100 executions &lt;span class=&quot;error&quot;&gt;&amp;#91;all branches&amp;#93;&lt;/span&gt; &lt;/p&gt;</comment>
                            <comment id="49716" author="pjones" created="Thu, 27 Dec 2012 09:32:46 +0000"  >&lt;p&gt;Bobijam could you please look into this one?&lt;/p&gt;</comment>
                            <comment id="49746" author="bobijam" created="Fri, 28 Dec 2012 04:28:18 +0000"  >&lt;p&gt;found a glitch along the ldlm_shrink path, ldlm_pool_shrink()&lt;del&gt;&amp;gt;ldlm_cli_pool_shrink()&lt;/del&gt;&amp;gt;ldlm_cancel_lru(ns, nr, LDLM_ASYNC, LDLM_CANCEL_SHRINK)&lt;del&gt;&amp;gt;ldlm_bl_to_thread_list(ns, NULL, &amp;amp;cancels, nr, LDLM_ASYNC)&lt;/del&gt;&amp;gt;ldlm_bl_to_thread(ns, NULL, cancels, nr, LDLM_ASYNC)-&amp;gt;OBD_ALLOC(blwi, sizeof(*blwi))&lt;/p&gt;

&lt;p&gt;Shrinker path should not allocate memory again. While It&apos;s not the deadlock root cause though. &lt;/p&gt;</comment>
                            <comment id="50098" author="niu" created="Tue, 8 Jan 2013 02:06:08 +0000"  >&lt;p&gt;In cfs_hash_bd_lookup_intent(), shouldn&apos;t we use cfs_hlist_for_each_safe() instead of cfs_hlist_for_each()? Seems the item is possible to be deleted from the list, that&apos;ll corrupt the list.&lt;/p&gt;</comment>
                            <comment id="50099" author="niu" created="Tue, 8 Jan 2013 02:29:59 +0000"  >&lt;p&gt;oops, the function (cfs_hash_bd_lookup_intent()) returns immediately after deletion, so that&apos;s fine.&lt;/p&gt;</comment>
                            <comment id="52668" author="yong.fan" created="Tue, 19 Feb 2013 01:21:04 +0000"  >&lt;p&gt;Another failure instance:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/b1455ce2-7a57-11e2-b5c8-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/b1455ce2-7a57-11e2-b5c8-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52702" author="adilger" created="Tue, 19 Feb 2013 14:17:08 +0000"  >&lt;p&gt;I pushed &lt;a href=&quot;http://review.whamcloud.com/5470&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5470&lt;/a&gt; for quieting the spurious &quot;cannot allocate a tage&quot; message, but this does not solve the root of the problem, which is that the MDS is consuming more and more memory.&lt;/p&gt;</comment>
                            <comment id="53896" author="jlevi" created="Wed, 13 Mar 2013 08:22:00 +0000"  >&lt;p&gt;Per discussions with BobiJam, lowering priority since &lt;a href=&quot;http://review.whamcloud.com/4940&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4940&lt;/a&gt; landed and believed to help. Will raise priority if further testing proves otherwise.&lt;/p&gt;</comment>
                            <comment id="54140" author="sarah" created="Fri, 15 Mar 2013 17:10:42 +0000"  >&lt;p&gt;Found OOM in the interop test between 2.3 server and the latest tag-2.3.62&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/422ac24e-8b95-11e2-abec-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/422ac24e-8b95-11e2-abec-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;MDS console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00:03:57:Lustre: DEBUG MARKER: == conf-sanity test 24a: Multiple MDTs on a single node == 00:03:40 (1363071820)
00:03:57:Lustre: DEBUG MARKER: grep -c /mnt/fs2mds&apos; &apos; /proc/mounts
00:03:57:Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp; lctl dl | grep &apos; ST &apos;
00:03:57:Lustre: DEBUG MARKER: mkfs.lustre --mgsnode=client-31vm3@tcp --fsname=lustre --mdt --index=0 --param=sys.timeout=20 --param=lov.stripesize=1048576 --param=lov.stripecount=0 --param=mdt.identity_upcall=/usr/sbin/l_getidentity --backfstype=ldiskfs --device-size=2097152 --mkfsopt
00:03:57:LDISKFS-fs (dm-1): mounted filesystem with ordered data mode. quota=off. Opts: 
00:03:57:Lustre: DEBUG MARKER: mkdir -p /mnt/mds1
00:03:57:Lustre: DEBUG MARKER: test -b /dev/lvm-MDS/P1
00:03:57:Lustre: DEBUG MARKER: mkdir -p /mnt/mds1; mount -t lustre -o user_xattr,acl  		                   /dev/lvm-MDS/P1 /mnt/mds1
00:03:57:LDISKFS-fs (dm-0): mounted filesystem with ordered data mode. quota=off. Opts: 
00:03:57:LDISKFS-fs (dm-0): mounted filesystem with ordered data mode. quota=off. Opts: 
00:03:57:Lustre: DEBUG MARKER: PATH=/usr/lib64/lustre/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:/opt/iozone/bin:/usr/lib64/lustre/tests//usr/lib64/lustre/tests:/usr/lib64/lustre/tests:/usr/lib64/lustre/tests/../utils:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lust
00:03:57:Lustre: DEBUG MARKER: e2label /dev/lvm-MDS/P1 2&amp;gt;/dev/null
00:04:08:Lustre: 3472:0:(client.c:1917:ptlrpc_expire_one_request()) @@@ Request  sent has timed out for slow reply: [sent 1363071835/real 1363071835]  req@ffff880066563800 x1429282250294701/t0(0) o8-&amp;gt;lustre-OST0000-osc-MDT0000@10.10.4.191@tcp:28/4 lens 400/544 e 0 to 1 dl 1363071840 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
00:04:08:Lustre: 3472:0:(client.c:1917:ptlrpc_expire_one_request()) Skipped 12 previous similar messages
00:04:10:Lustre: DEBUG MARKER: mkdir -p /mnt/fs2mds
00:04:11:Lustre: DEBUG MARKER: test -b /dev/lvm-MDS/S1
00:04:12:Lustre: DEBUG MARKER: mkdir -p /mnt/fs2mds; mount -t lustre -o user_xattr,acl  		                   /dev/lvm-MDS/S1 /mnt/fs2mds
00:04:12:LDISKFS-fs (dm-1): mounted filesystem with ordered data mode. quota=off. Opts: 
00:04:12:LDISKFS-fs (dm-1): mounted filesystem with ordered data mode. quota=off. Opts: 
00:04:12:Lustre: Setting parameter test1234-MDT0000-mdtlov.lov.stripesize in log test1234-MDT0000
00:04:12:Lustre: Skipped 4 previous similar messages
00:04:12:Lustre: test1234-MDT0000: new disk, initializing
00:04:12:__ratelimit: 23 callbacks suppressed
00:04:12:llog_process_th invoked oom-killer: gfp_mask=0xd2, order=0, oom_adj=-17, oom_score_adj=0
00:04:12:llog_process_th cpuset=/ mems_allowed=0
00:04:12:Pid: 900, comm: llog_process_th Not tainted 2.6.32-279.5.1.el6_lustre.gb16fe80.x86_64 #1
00:04:12:Call Trace:
00:04:12: [&amp;lt;ffffffff810c4aa1&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
00:04:12: [&amp;lt;ffffffff81117210&amp;gt;] ? dump_header+0x90/0x1b0
00:04:12: [&amp;lt;ffffffff81117692&amp;gt;] ? oom_kill_process+0x82/0x2a0
00:04:12: [&amp;lt;ffffffff8111758e&amp;gt;] ? select_bad_process+0x9e/0x120
00:04:12: [&amp;lt;ffffffff81117ad0&amp;gt;] ? out_of_memory+0x220/0x3c0
00:04:12: [&amp;lt;ffffffff811277ee&amp;gt;] ? __alloc_pages_nodemask+0x89e/0x940
00:04:12: [&amp;lt;ffffffff8114d789&amp;gt;] ? __vmalloc_area_node+0xb9/0x190
00:04:12: [&amp;lt;ffffffffa04df9e0&amp;gt;] ? cfs_cpt_vmalloc+0x20/0x30 [libcfs]
00:04:12: [&amp;lt;ffffffff8114d65d&amp;gt;] ? __vmalloc_node+0xad/0x120
00:04:12: [&amp;lt;ffffffffa04df9e0&amp;gt;] ? cfs_cpt_vmalloc+0x20/0x30 [libcfs]
00:04:12: [&amp;lt;ffffffff8114d9a9&amp;gt;] ? vmalloc_node+0x29/0x30
00:04:12: [&amp;lt;ffffffffa04df9e0&amp;gt;] ? cfs_cpt_vmalloc+0x20/0x30 [libcfs]
00:04:12: [&amp;lt;ffffffffa07e726e&amp;gt;] ? ptlrpc_alloc_rqbd+0x13e/0x690 [ptlrpc]
00:04:12: [&amp;lt;ffffffffa07e7825&amp;gt;] ? ptlrpc_grow_req_bufs+0x65/0x1b0 [ptlrpc]
00:04:12: [&amp;lt;ffffffffa07e8e07&amp;gt;] ? ptlrpc_register_service+0x1497/0x1770 [ptlrpc]
00:04:12: [&amp;lt;ffffffffa0e38c62&amp;gt;] ? mdt_init0+0x1202/0x23f0 [mdt]
00:04:12: [&amp;lt;ffffffffa0e39f43&amp;gt;] ? mdt_device_alloc+0xf3/0x220 [mdt]
00:04:12: [&amp;lt;ffffffffa064b0d7&amp;gt;] ? obd_setup+0x1d7/0x2f0 [obdclass]
00:04:12: [&amp;lt;ffffffffa064b3f8&amp;gt;] ? class_setup+0x208/0x890 [obdclass]
00:04:12: [&amp;lt;ffffffffa065308c&amp;gt;] ? class_process_config+0xc0c/0x1c30 [obdclass]
00:04:12: [&amp;lt;ffffffffa04ea088&amp;gt;] ? libcfs_log_return+0x28/0x40 [libcfs]
00:04:12: [&amp;lt;ffffffffa064cef1&amp;gt;] ? lustre_cfg_new+0x391/0x7e0 [obdclass]
00:04:12: [&amp;lt;ffffffffa065515b&amp;gt;] ? class_config_llog_handler+0x9bb/0x1610 [obdclass]
00:04:12: [&amp;lt;ffffffffa061e1f8&amp;gt;] ? llog_process_thread+0x888/0xd00 [obdclass]
00:04:12: [&amp;lt;ffffffffa061d970&amp;gt;] ? llog_process_thread+0x0/0xd00 [obdclass]
00:04:12: [&amp;lt;ffffffff8100c14a&amp;gt;] ? child_rip+0xa/0x20
00:04:12: [&amp;lt;ffffffffa061d970&amp;gt;] ? llog_process_thread+0x0/0xd00 [obdclass]
00:04:12: [&amp;lt;ffffffffa061d970&amp;gt;] ? llog_process_thread+0x0/0xd00 [obdclass]
00:04:12: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
00:04:12:Mem-Info:
00:04:12:Node 0 DMA per-cpu:
00:04:12:CPU    0: hi:    0, btch:   1 usd:   0
00:04:12:CPU    1: hi:    0, btch:   1 usd:   0
00:04:12:CPU    2: hi:    0, btch:   1 usd:   0
00:04:12:CPU    3: hi:    0, btch:   1 usd:   0
00:04:12:CPU    4: hi:    0, btch:   1 usd:   0
00:04:12:CPU    5: hi:    0, btch:   1 usd:   0
00:04:12:CPU    6: hi:    0, btch:   1 usd:   0
00:04:12:CPU    7: hi:    0, btch:   1 usd:   0
00:04:12:CPU    8: hi:    0, btch:   1 usd:   0
00:04:12:CPU    9: hi:    0, btch:   1 usd:   0
00:04:12:CPU   10: hi:    0, btch:   1 usd:   0
00:04:12:CPU   11: hi:    0, btch:   1 usd:   0
00:04:12:CPU   12: hi:    0, btch:   1 usd:   0
00:04:12:CPU   13: hi:    0, btch:   1 usd:   0
00:04:12:CPU   14: hi:    0, btch:   1 usd:   0
00:04:12:CPU   15: hi:    0, btch:   1 usd:   0
00:04:12:CPU   16: hi:    0, btch:   1 usd:   0
00:04:12:CPU   17: hi:    0, btch:   1 usd:   0
00:04:12:CPU   18: hi:    0, btch:   1 usd:   0
00:04:12:CPU   19: hi:    0, btch:   1 usd:   0
00:04:12:CPU   20: hi:    0, btch:   1 usd:   0
00:04:12:CPU   21: hi:    0, btch:   1 usd:   0
00:04:12:CPU   22: hi:    0, btch:   1 usd:   0
00:04:13:CPU   23: hi:    0, btch:   1 usd:   0
00:04:13:Node 0 DMA32 per-cpu:
00:04:13:CPU    0: hi:  186, btch:  31 usd:  64
00:04:13:CPU    1: hi:  186, btch:  31 usd:   0
00:04:13:CPU    2: hi:  186, btch:  31 usd:   0
00:04:13:CPU    3: hi:  186, btch:  31 usd:   0
00:04:13:CPU    4: hi:  186, btch:  31 usd:   0
00:04:13:CPU    5: hi:  186, btch:  31 usd:   0
00:04:13:CPU    6: hi:  186, btch:  31 usd:   0
00:04:13:CPU    7: hi:  186, btch:  31 usd:   0
00:04:13:CPU    8: hi:  186, btch:  31 usd:   0
00:04:13:CPU    9: hi:  186, btch:  31 usd:   0
00:04:13:CPU   10: hi:  186, btch:  31 usd:   0
00:04:13:CPU   11: hi:  186, btch:  31 usd:   0
00:04:13:CPU   12: hi:  186, btch:  31 usd:   0
00:04:13:CPU   13: hi:  186, btch:  31 usd:   0
00:04:13:CPU   14: hi:  186, btch:  31 usd:   0
00:04:13:CPU   15: hi:  186, btch:  31 usd:   0
00:04:13:CPU   16: hi:  186, btch:  31 usd:   0
00:04:13:CPU   17: hi:  186, btch:  31 usd:   0
00:04:13:CPU   18: hi:  186, btch:  31 usd:   0
00:04:13:CPU   19: hi:  186, btch:  31 usd:   0
00:04:13:CPU   20: hi:  186, btch:  31 usd:   0
00:04:13:CPU   21: hi:  186, btch:  31 usd:   0
00:04:13:CPU   22: hi:  186, btch:  31 usd:   0
00:04:13:CPU   23: hi:  186, btch:  31 usd:  58
00:04:13:active_anon:3696 inactive_anon:1307 isolated_anon:0
00:04:13: active_file:105 inactive_file:47 isolated_file:0
00:04:13: unevictable:0 dirty:72 writeback:927 unstable:0
00:04:13: free:13329 slab_reclaimable:2530 slab_unreclaimable:28916
00:04:13: mapped:0 shmem:41 pagetables:818 bounce:0
00:04:13:Node 0 DMA free:8348kB min:332kB low:412kB high:496kB active_anon:0kB inactive_anon:8kB active_file:188kB inactive_file:196kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15324kB mlocked:0kB dirty:4kB writeback:208kB mapped:0kB shmem:0kB slab_reclaimable:176kB slab_unreclaimable:452kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:4552 all_unreclaimable? yes
00:04:13:lowmem_reserve[]: 0 2003 2003 2003
00:04:13:Node 0 DMA32 free:44968kB min:44720kB low:55900kB high:67080kB active_anon:14784kB inactive_anon:5220kB active_file:232kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:2052060kB mlocked:0kB dirty:284kB writeback:3500kB mapped:0kB shmem:164kB slab_reclaimable:9944kB slab_unreclaimable:115212kB kernel_stack:5760kB pagetables:3272kB unstable:0kB bounce:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? no
00:04:13:lowmem_reserve[]: 0 0 0 0
00:04:13:Node 0 DMA: 49*4kB 43*8kB 38*16kB 23*32kB 11*64kB 5*128kB 2*256kB 3*512kB 1*1024kB 1*2048kB 0*4096kB = 8348kB
00:04:13:Node 0 DMA32: 1692*4kB 1057*8kB 583*16kB 269*32kB 91*64kB 16*128kB 5*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 45896kB
00:04:13:1404 total pagecache pages
00:04:13:1080 pages in swap cache
00:04:13:Swap cache stats: add 1506, delete 426, find 0/0
00:04:13:Free swap  = 4122736kB
00:04:13:Total swap = 4128760kB
00:04:13:524283 pages RAM
00:04:13:44356 pages reserved
00:04:13:301 pages shared
00:04:13:455288 pages non-shared
00:04:13:[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name
00:04:13:[  723]     0   723     2694       35  22     -17         -1000 udevd
00:04:13:[ 1779]     0  1779     6909       33   7     -17         -1000 auditd
00:04:13:[ 1795]     0  1795    62270      121   0       0             0 rsyslogd
00:04:13:[ 1824]     0  1824     2286       42  10       0             0 irqbalance
00:04:13:[ 1838]    32  1838     4743       17   1       0             0 rpcbind
00:04:13:[ 1856]    29  1856     5836        3   0       0             0 rpc.statd
00:04:13:[ 2005]    81  2005     5868       34   0       0             0 dbus-daemon
00:04:13:[ 2038]     0  2038     1019        0  13       0             0 acpid
00:04:13:[ 2047]    68  2047     6805      118   0       0             0 hald
00:04:13:[ 2048]     0  2048     4525        1   5       0             0 hald-runner
00:04:13:[ 2076]     0  2076     5054        1   0       0             0 hald-addon-inpu
00:04:13:[ 2083]    68  2083     4450        1   0       0             0 hald-addon-acpi
00:04:13:[ 2097]     0  2097    45434       37   0       0             0 sssd
00:04:13:[ 2100]     0  2100    50963      186   0       0             0 sssd_be
00:04:13:[ 2109]     0  2109    43037      114   1       0             0 sssd_nss
00:04:13:[ 2110]     0  2110    42892        1   2       0             0 sssd_pam
00:04:13:[ 2121]     0  2121   167752      323   0       0             0 automount
00:04:13:[ 2162]     0  2162    26826        1   5       0             0 rpc.rquotad
00:04:13:[ 2166]     0  2166     5413      144  11       0             0 rpc.mountd
00:04:13:[ 2215]     0  2215     6290       62   1       0             0 rpc.idmapd
00:04:13:[ 2258]   498  2258    56709      255   2       0             0 munged
00:04:13:[ 2273]     0  2273    16017      166   1     -17         -1000 sshd
00:04:13:[ 2281]     0  2281     5522       60   0       0             0 xinetd
00:04:13:[ 2289]    38  2289     7004      113   0       0             0 ntpd
00:04:13:[ 2305]     0  2305    22183      415   1       0             0 sendmail
00:04:13:[ 2313]    51  2313    19528      348   1       0             0 sendmail
00:04:13:[ 2335]     0  2335    27017       60   3       0             0 abrt-dump-oops
00:04:13:[ 2343]     0  2343    29303      152   6       0             0 crond
00:04:13:[ 2354]     0  2354     5363       45   0       0             0 atd
00:04:13:[ 2380]     0  2380     1018       23   0       0             0 agetty
00:04:13:[ 2381]     0  2381     1015       22   0       0             0 mingetty
00:04:13:[ 2383]     0  2383     1015       22   0       0             0 mingetty
00:04:13:[ 2385]     0  2385     1015       21   6       0             0 mingetty
00:04:14:[ 2387]     0  2387     1015       22   9       0             0 mingetty
00:04:15:[ 2389]     0  2389     1015       21   6       0             0 mingetty
00:04:15:[ 2390]     0  2390     2706       46   2     -17         -1000 udevd
00:04:15:[ 2391]     0  2391     2706       39   5     -17         -1000 udevd
00:04:17:[ 2393]     0  2393     1015       21   6       0             0 mingetty
00:04:17:[  806]     0   806    15354      173   0       0             0 in.mrshd
00:04:17:[  807]     0   807    26514       66  15       0             0 bash
00:04:17:[  828]     0   828    26514       66  16       0             0 bash
00:04:17:[  829]     0   829    26515       49  20       0             0 sh
00:04:17:[  831]     0   831    27935       39  22       0             0 mount
00:04:17:[  832]     0   832     2092       42   0       0             0 mount.lustre
00:04:18:Out of memory: Kill process 1795 (rsyslogd) score 1 or sacrifice child
00:04:18:Killed process 1795, UID 0, (rsyslogd) total-vm:249080kB, anon-rss:480kB, file-rss:4kB
00:04:18:llog_process_th invoked oom-killer: gfp_mask=0xd2, order=0, oom_adj=-17, oom_score_adj=0
00:04:18:llog_process_th cpuset=/ mems_allowed=0
00:04:18:Pid: 900, comm: llog_process_th Not tainted 2.6.32-279.5.1.el6_lustre.gb16fe80.x86_64 #1
00:04:19:Call Trace:
00:04:19: [&amp;lt;ffffffff810c4aa1&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
00:04:19: [&amp;lt;ffffffff81117210&amp;gt;] ? dump_header+0x90/0x1b0
00:04:19: [&amp;lt;ffffffff810e368e&amp;gt;] ? __delayacct_freepages_end+0x2e/0x30
00:04:19: [&amp;lt;ffffffff8121489c&amp;gt;] ? security_real_capable_noaudit+0x3c/0x70
00:04:19: [&amp;lt;ffffffff81117692&amp;gt;] ? oom_kill_process+0x82/0x2a0
00:04:20: [&amp;lt;ffffffff8111758e&amp;gt;] ? select_bad_process+0x9e/0x120
00:04:20: [&amp;lt;ffffffff81117ad0&amp;gt;] ? out_of_memory+0x220/0x3c0
00:04:20: [&amp;lt;ffffffff811277ee&amp;gt;] ? __alloc_pages_nodemask+0x89e/0x940
00:04:20: [&amp;lt;ffffffff8114d789&amp;gt;] ? __vmalloc_area_node+0xb9/0x190
00:04:20: [&amp;lt;ffffffffa04df9e0&amp;gt;] ? cfs_cpt_vmalloc+0x20/0x30 [libcfs]
00:04:20: [&amp;lt;ffffffff8114d65d&amp;gt;] ? __vmalloc_node+0xad/0x120
00:04:20: [&amp;lt;ffffffffa04df9e0&amp;gt;] ? cfs_cpt_vmalloc+0x20/0x30 [libcfs]
00:04:20: [&amp;lt;ffffffff8114d9a9&amp;gt;] ? vmalloc_node+0x29/0x30
00:04:20: [&amp;lt;ffffffffa04df9e0&amp;gt;] ? cfs_cpt_vmalloc+0x20/0x30 [libcfs]
00:04:20: [&amp;lt;ffffffffa07e726e&amp;gt;] ? ptlrpc_alloc_rqbd+0x13e/0x690 [ptlrpc]
00:04:20: [&amp;lt;ffffffffa07e7825&amp;gt;] ? ptlrpc_grow_req_bufs+0x65/0x1b0 [ptlrpc]
00:04:20: [&amp;lt;ffffffffa07e8e07&amp;gt;] ? ptlrpc_register_service+0x1497/0x1770 [ptlrpc]
00:04:20: [&amp;lt;ffffffffa0e38c62&amp;gt;] ? mdt_init0+0x1202/0x23f0 [mdt]
00:04:20: [&amp;lt;ffffffffa0e39f43&amp;gt;] ? mdt_device_alloc+0xf3/0x220 [mdt]
00:04:20: [&amp;lt;ffffffffa064b0d7&amp;gt;] ? obd_setup+0x1d7/0x2f0 [obdclass]
00:04:20: [&amp;lt;ffffffffa064b3f8&amp;gt;] ? class_setup+0x208/0x890 [obdclass]
00:04:20: [&amp;lt;ffffffffa065308c&amp;gt;] ? class_process_config+0xc0c/0x1c30 [obdclass]
00:04:20: [&amp;lt;ffffffffa04ea088&amp;gt;] ? libcfs_log_return+0x28/0x40 [libcfs]
00:04:20: [&amp;lt;ffffffffa064cef1&amp;gt;] ? lustre_cfg_new+0x391/0x7e0 [obdclass]
00:04:20: [&amp;lt;ffffffffa065515b&amp;gt;] ? class_config_llog_handler+0x9bb/0x1610 [obdclass]
00:04:20: [&amp;lt;ffffffffa061e1f8&amp;gt;] ? llog_process_thread+0x888/0xd00 [obdclass]
00:04:20: [&amp;lt;ffffffffa061d970&amp;gt;] ? llog_process_thread+0x0/0xd00 [obdclass]
00:04:20: [&amp;lt;ffffffff8100c14a&amp;gt;] ? child_rip+0xa/0x20
00:04:20: [&amp;lt;ffffffffa061d970&amp;gt;] ? llog_process_thread+0x0/0xd00 [obdclass]
00:04:20: [&amp;lt;ffffffffa061d970&amp;gt;] ? llog_process_thread+0x0/0xd00 [obdclass]
00:04:20: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;another failure: &lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/ebd2a076-8b97-11e2-abec-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/ebd2a076-8b97-11e2-abec-52540035b04c&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/89f10fd6-8b98-11e2-abec-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/89f10fd6-8b98-11e2-abec-52540035b04c&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/3a168224-8b99-11e2-abec-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/3a168224-8b99-11e2-abec-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="54274" author="sarah" created="Mon, 18 Mar 2013 16:58:43 +0000"  >&lt;p&gt;I think this should be raise priority since it affected many interop tests&lt;/p&gt;</comment>
                            <comment id="54289" author="adilger" created="Mon, 18 Mar 2013 18:53:45 +0000"  >&lt;p&gt;Sarah to submit a patch to skip conf-sanity test_24a:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        [ $(lustre_version_code mds) -eq $(version_code 2.3.0) ] &amp;amp;&amp;amp;
                skip &lt;span class=&quot;code-quote&quot;&gt;&quot;skipping test &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 2.3.0 MDS due to LU-2468&quot;&lt;/span&gt; &amp;amp;&amp;amp; &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="54384" author="sarah" created="Tue, 19 Mar 2013 16:41:06 +0000"  >&lt;p&gt;Andreas, got it, thanks!&lt;/p&gt;</comment>
                            <comment id="54636" author="liang" created="Fri, 22 Mar 2013 04:40:24 +0000"  >&lt;p&gt;I guess we didn&apos;t submit anything to 2.3, which means recent patches can&apos;t help for 2.3 MDS?&lt;/p&gt;</comment>
                            <comment id="56538" author="patrick.valentin" created="Thu, 18 Apr 2013 11:38:56 +0000"  >&lt;p&gt;A Bull customer with lustre 2.1.3 had the same hang on a lustre client (loggin node).&lt;br/&gt;
The system was hung and a dump was initiated from the the bmc, by sending a NMI.&lt;br/&gt;
The dump shows there was no more activity on the system. The 12 CPUs are idle (swapper).&lt;br/&gt;
A lot of processes are in page_fault(), blocked in ldlm_pools_shrink().&lt;br/&gt;
I have attached the output of the &quot;foreach bt&quot; crash command. Let me know if you need the vmcore file.&lt;br/&gt;
I have asked the support team to provide more information on the frequency of this hang, the syslog, the activity when the hang occured...&lt;/p&gt;</comment>
                            <comment id="65849" author="sebastien.buisson" created="Thu, 5 Sep 2013 15:49:16 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;Do you have any news on this one?&lt;br/&gt;
This issue is occurring more and more often on various customer clusters.&lt;/p&gt;

&lt;p&gt;Each time, we can see a lot OOM messages in the syslog of the dump files.&lt;/p&gt;

&lt;p&gt;TIA,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="131923" author="adilger" created="Wed, 28 Oct 2015 20:44:01 +0000"  >&lt;p&gt;This was fixed for Lustre 2.4.0 by &lt;a href=&quot;http://review.whamcloud.com/4940&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4940&lt;/a&gt; landing.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="17341">LU-2703</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="26937">LU-5722</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="12544" name="UJF_crash_foreach_bt.gz" size="36465" author="lustre-bull" created="Thu, 18 Apr 2013 11:32:30 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvdmn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5814</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>