<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:45:24 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11613] MDS and OSS locked up wait_transaction_locked+0x85/0xd0 [jbd2]</title>
                <link>https://jira.whamcloud.com/browse/LU-11613</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Hi&#160;&lt;/p&gt;

&lt;p&gt;We are experiencing the Lustre MDS and OSS server deadlock with high CPU usage and once the server is rebooted the file system goes back to normal. By looking at the stack traces this looks similar to&#160;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11465&quot; title=&quot;OSS/MDS deadlock in 2.10.5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11465&quot;&gt;&lt;del&gt;LU-11465&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Here are the stack trace from MDS server.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Nov&#160; 3 19:02:14 mds rsyslogd: -- MARK --
Nov&#160; 3 19:07:07 mds kernel: [468204.667688] INFO: task jbd2/dm-0:5862 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Nov&#160; 3 19:07:07 mds kernel: [468204.687998] &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Nov&#160; 3 19:07:07 mds kernel: [468204.711686] jbd2/dm-0 &#160; &#160; &#160; D ffff883fcf614f10 &#160; &#160; 0&#160; 5862&#160; &#160; &#160; 2 0x00000080
Nov&#160; 3 19:07:07 mds kernel: [468204.711693] Call Trace:
Nov&#160; 3 19:07:07 mds kernel: [468204.719277]&#160; [&amp;lt;ffffffff81694cd9&amp;gt;] schedule+0x29/0x70
Nov&#160; 3 19:07:07 mds kernel: [468204.719303]&#160; [&amp;lt;ffffffffa01822b6&amp;gt;] jbd2_journal_commit_transaction+0x246/0x19a0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719309]&#160; [&amp;lt;ffffffff810cac9e&amp;gt;] ? account_entity_dequeue+0xae/0xd0
Nov&#160; 3 19:07:07 mds kernel: [468204.719312]&#160; [&amp;lt;ffffffff810ce77c&amp;gt;] ? dequeue_entity+0x11c/0x5e0
Nov&#160; 3 19:07:07 mds kernel: [468204.719315]&#160; [&amp;lt;ffffffff810c8185&amp;gt;] ? sched_clock_cpu+0x85/0xc0
Nov&#160; 3 19:07:07 mds kernel: [468204.719318]&#160; [&amp;lt;ffffffff8102954d&amp;gt;] ? __switch_to+0xcd/0x4b0
Nov&#160; 3 19:07:07 mds kernel: [468204.719322]&#160; [&amp;lt;ffffffff810b20a0&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov&#160; 3 19:07:07 mds kernel: [468204.719324]&#160; [&amp;lt;ffffffff816946f7&amp;gt;] ? __schedule+0x477/0xa30
Nov&#160; 3 19:07:07 mds kernel: [468204.719328]&#160; [&amp;lt;ffffffff810998ee&amp;gt;] ? try_to_del_timer_sync+0x5e/0x90
Nov&#160; 3 19:07:07 mds kernel: [468204.719334]&#160; [&amp;lt;ffffffffa0188ab9&amp;gt;] kjournald2+0xc9/0x260 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719336]&#160; [&amp;lt;ffffffff810b20a0&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov&#160; 3 19:07:07 mds kernel: [468204.719341]&#160; [&amp;lt;ffffffffa01889f0&amp;gt;] ? commit_timeout+0x10/0x10 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719343]&#160; [&amp;lt;ffffffff810b1131&amp;gt;] kthread+0xd1/0xe0
Nov&#160; 3 19:07:07 mds kernel: [468204.719345]&#160; [&amp;lt;ffffffff810b1060&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov&#160; 3 19:07:07 mds kernel: [468204.719347]&#160; [&amp;lt;ffffffff816a14f7&amp;gt;] ret_from_fork+0x77/0xb0
Nov&#160; 3 19:07:07 mds kernel: [468204.719349]&#160; [&amp;lt;ffffffff810b1060&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov&#160; 3 19:07:07 mds kernel: [468204.719351] INFO: task mdt00_002:5872 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Nov&#160; 3 19:07:07 mds kernel: [468204.719352] &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Nov&#160; 3 19:07:07 mds kernel: [468204.719354] mdt00_002 &#160; &#160; &#160; D ffff881fc0dd4f10 &#160; &#160; 0&#160; 5872&#160; &#160; &#160; 2 0x00000080
Nov&#160; 3 19:07:07 mds kernel: [468204.719354] Call Trace:
Nov&#160; 3 19:07:07 mds kernel: [468204.719356]&#160; [&amp;lt;ffffffff81694cd9&amp;gt;] schedule+0x29/0x70
Nov&#160; 3 19:07:07 mds kernel: [468204.719361]&#160; [&amp;lt;ffffffffa017f085&amp;gt;] wait_transaction_locked+0x85/0xd0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719362]&#160; [&amp;lt;ffffffff810b20a0&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov&#160; 3 19:07:07 mds kernel: [468204.719367]&#160; [&amp;lt;ffffffffa017f368&amp;gt;] add_transaction_credits+0x268/0x2f0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719371]&#160; [&amp;lt;ffffffffa017f5e1&amp;gt;] start_this_handle+0x1a1/0x430 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719385]&#160; [&amp;lt;ffffffffa1087fba&amp;gt;] ? osd_declare_write+0x1ea/0x450 [osd_ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.719390]&#160; [&amp;lt;ffffffff811df622&amp;gt;] ? kmem_cache_alloc+0x1c2/0x1f0
Nov&#160; 3 19:07:07 mds kernel: [468204.719394]&#160; [&amp;lt;ffffffffa017fa93&amp;gt;] jbd2__journal_start+0xf3/0x1f0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.719401]&#160; [&amp;lt;ffffffffa1069c8e&amp;gt;] ? osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.719410]&#160; [&amp;lt;ffffffffa0fcb909&amp;gt;] __ldiskfs_journal_start_sb+0x69/0xe0 [ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.719415]&#160; [&amp;lt;ffffffffa1069c8e&amp;gt;] osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.719430]&#160; [&amp;lt;ffffffffa1232da7&amp;gt;] mdt_empty_transno+0xf7/0x840 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719439]&#160; [&amp;lt;ffffffffa1235ede&amp;gt;] mdt_mfd_open+0x8de/0xe70 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719446]&#160; [&amp;lt;ffffffffa120e012&amp;gt;] ? mdt_pack_acl2body+0x1b2/0x800 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719454]&#160; [&amp;lt;ffffffffa12369eb&amp;gt;] mdt_finish_open+0x57b/0x690 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719462]&#160; [&amp;lt;ffffffffa12382c8&amp;gt;] mdt_reint_open+0x17c8/0x3190 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719495]&#160; [&amp;lt;ffffffffa09b2311&amp;gt;] ? upcall_cache_get_entry+0x211/0x8f0 [obdclass]
Nov&#160; 3 19:07:07 mds kernel: [468204.719513]&#160; [&amp;lt;ffffffffa09b726e&amp;gt;] ? lu_ucred+0x1e/0x30 [obdclass]
Nov&#160; 3 19:07:07 mds kernel: [468204.719523]&#160; [&amp;lt;ffffffffa121d9e5&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719532]&#160; [&amp;lt;ffffffffa121e2b1&amp;gt;] ? mdt_root_squash+0x21/0x430 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719542]&#160; [&amp;lt;ffffffffa122daf3&amp;gt;] mdt_reint_rec+0x83/0x210 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719551]&#160; [&amp;lt;ffffffffa120f33b&amp;gt;] mdt_reint_internal+0x5fb/0x9c0 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719560]&#160; [&amp;lt;ffffffffa120f862&amp;gt;] mdt_intent_reint+0x162/0x430 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719569]&#160; [&amp;lt;ffffffffa121a631&amp;gt;] mdt_intent_policy+0x441/0xc70 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.719614]&#160; [&amp;lt;ffffffffa0b801db&amp;gt;] ? ldlm_resource_get+0xab/0xa60 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719640]&#160; [&amp;lt;ffffffffa0b792ba&amp;gt;] ldlm_lock_enqueue+0x38a/0x980 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719678]&#160; [&amp;lt;ffffffffa0ba2b53&amp;gt;] ldlm_handle_enqueue0+0x9d3/0x16a0 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719715]&#160; [&amp;lt;ffffffffa0bcada0&amp;gt;] ? lustre_swab_ldlm_lock_desc+0x30/0x30 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719761]&#160; [&amp;lt;ffffffffa0c28262&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719800]&#160; [&amp;lt;ffffffffa0c2beca&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719836]&#160; [&amp;lt;ffffffffa0bd44bb&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719872]&#160; [&amp;lt;ffffffffa0bd29b8&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719877]&#160; [&amp;lt;ffffffff810c4d40&amp;gt;] ? wake_up_state+0x20/0x20
Nov&#160; 3 19:07:07 mds kernel: [468204.719911]&#160; [&amp;lt;ffffffffa0bd84a2&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719946]&#160; [&amp;lt;ffffffffa0bd7a10&amp;gt;] ? ptlrpc_register_service+0xe30/0xe30 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.719952]&#160; [&amp;lt;ffffffff810b1131&amp;gt;] kthread+0xd1/0xe0
Nov&#160; 3 19:07:07 mds kernel: [468204.719955]&#160; [&amp;lt;ffffffff810b1060&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov&#160; 3 19:07:07 mds kernel: [468204.719958]&#160; [&amp;lt;ffffffff816a14f7&amp;gt;] ret_from_fork+0x77/0xb0
Nov&#160; 3 19:07:07 mds kernel: [468204.719961]&#160; [&amp;lt;ffffffff810b1060&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov&#160; 3 19:07:07 mds kernel: [468204.719963] INFO: task mdt01_001:5874 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Nov&#160; 3 19:07:07 mds kernel: [468204.719965] &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Nov&#160; 3 19:07:07 mds kernel: [468204.719971] mdt01_001 &#160; &#160; &#160; D ffff881fc0dd0fd0 &#160; &#160; 0&#160; 5874&#160; &#160; &#160; 2 0x00000080
Nov&#160; 3 19:07:07 mds kernel: [468204.719974] Call Trace:
Nov&#160; 3 19:07:07 mds kernel: [468204.720005]&#160; [&amp;lt;ffffffffa0bae0a0&amp;gt;] ? ldlm_pool_add+0x80/0xf0 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720010]&#160; [&amp;lt;ffffffff81694cd9&amp;gt;] schedule+0x29/0x70
Nov&#160; 3 19:07:07 mds kernel: [468204.720018]&#160; [&amp;lt;ffffffffa017f085&amp;gt;] wait_transaction_locked+0x85/0xd0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.720022]&#160; [&amp;lt;ffffffff810b20a0&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov&#160; 3 19:07:07 mds kernel: [468204.720028]&#160; [&amp;lt;ffffffffa017f368&amp;gt;] add_transaction_credits+0x268/0x2f0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.720035]&#160; [&amp;lt;ffffffffa017f5e1&amp;gt;] start_this_handle+0x1a1/0x430 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.720048]&#160; [&amp;lt;ffffffffa1087fba&amp;gt;] ? osd_declare_write+0x1ea/0x450 [osd_ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.720053]&#160; [&amp;lt;ffffffff811df622&amp;gt;] ? kmem_cache_alloc+0x1c2/0x1f0
Nov&#160; 3 19:07:07 mds kernel: [468204.720059]&#160; [&amp;lt;ffffffffa017fa93&amp;gt;] jbd2__journal_start+0xf3/0x1f0 [jbd2]
Nov&#160; 3 19:07:07 mds kernel: [468204.720068]&#160; [&amp;lt;ffffffffa1069c8e&amp;gt;] ? osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.720078]&#160; [&amp;lt;ffffffffa0fcb909&amp;gt;] __ldiskfs_journal_start_sb+0x69/0xe0 [ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.720088]&#160; [&amp;lt;ffffffffa1069c8e&amp;gt;] osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov&#160; 3 19:07:07 mds kernel: [468204.720100]&#160; [&amp;lt;ffffffffa1232da7&amp;gt;] mdt_empty_transno+0xf7/0x840 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720112]&#160; [&amp;lt;ffffffffa1235ede&amp;gt;] mdt_mfd_open+0x8de/0xe70 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720124]&#160; [&amp;lt;ffffffffa120e012&amp;gt;] ? mdt_pack_acl2body+0x1b2/0x800 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720136]&#160; [&amp;lt;ffffffffa12369eb&amp;gt;] mdt_finish_open+0x57b/0x690 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720149]&#160; [&amp;lt;ffffffffa12382c8&amp;gt;] mdt_reint_open+0x17c8/0x3190 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720170]&#160; [&amp;lt;ffffffffa09b2311&amp;gt;] ? upcall_cache_get_entry+0x211/0x8f0 [obdclass]
Nov&#160; 3 19:07:07 mds kernel: [468204.720190]&#160; [&amp;lt;ffffffffa09b726e&amp;gt;] ? lu_ucred+0x1e/0x30 [obdclass]
Nov&#160; 3 19:07:07 mds kernel: [468204.720202]&#160; [&amp;lt;ffffffffa121d9e5&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720214]&#160; [&amp;lt;ffffffffa121e2b1&amp;gt;] ? mdt_root_squash+0x21/0x430 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720227]&#160; [&amp;lt;ffffffffa122daf3&amp;gt;] mdt_reint_rec+0x83/0x210 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720239]&#160; [&amp;lt;ffffffffa120f33b&amp;gt;] mdt_reint_internal+0x5fb/0x9c0 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720249]&#160; [&amp;lt;ffffffffa120f862&amp;gt;] mdt_intent_reint+0x162/0x430 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720262]&#160; [&amp;lt;ffffffffa121a631&amp;gt;] mdt_intent_policy+0x441/0xc70 [mdt]
Nov&#160; 3 19:07:07 mds kernel: [468204.720291]&#160; [&amp;lt;ffffffffa0b801db&amp;gt;] ? ldlm_resource_get+0xab/0xa60 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720319]&#160; [&amp;lt;ffffffffa0b792ba&amp;gt;] ldlm_lock_enqueue+0x38a/0x980 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720350]&#160; [&amp;lt;ffffffffa0ba2b53&amp;gt;] ldlm_handle_enqueue0+0x9d3/0x16a0 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720382]&#160; [&amp;lt;ffffffffa0bcada0&amp;gt;] ? lustre_swab_ldlm_lock_desc+0x30/0x30 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720418]&#160; [&amp;lt;ffffffffa0c28262&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720456]&#160; [&amp;lt;ffffffffa0c2beca&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
Nov&#160; 3 19:07:07 mds kernel: [468204.720491]&#160; [&amp;lt;ffffffffa0bd44bb&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
Nov&#160; 3 19:07:14 mds kernel: [468204.720525]&#160; [&amp;lt;ffffffffa0bd29b8&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
Nov&#160; 3 19:07:14 mds kernel: [468204.720529]&#160; [&amp;lt;ffffffff810c4d52&amp;gt;] ? default_wake_function+0x12/0x20
Nov&#160; 3 19:07:14 mds kernel: [468204.720531]&#160; [&amp;lt;ffffffff810bad2b&amp;gt;] ? __wake_up_common+0x5b/0x90
Nov&#160; 3 19:07:14 mds kernel: [468204.720564]&#160; [&amp;lt;ffffffffa0bd84a2&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
Nov&#160; 3 19:07:14 mds kernel: [468204.720597]&#160; [&amp;lt;ffffffffa0bd7a10&amp;gt;] ? ptlrpc_register_service+0xe30/0xe30 [ptlrpc]
Nov&#160; 3 19:07:14 mds kernel: [468204.720600]&#160; [&amp;lt;ffffffff810b1131&amp;gt;] kthread+0xd1/0xe0
Nov&#160; 3 19:07:14 mds kernel: [468204.720602]&#160; [&amp;lt;ffffffff810b1060&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov&#160; 3 19:07:14 mds kernel: [468204.720603]&#160; [&amp;lt;ffffffff816a14f7&amp;gt;] ret_from_fork+0x77/0xb0
Nov&#160; 3 19:07:14 mds kernel: [468204.720605]&#160; [&amp;lt;ffffffff810b1060&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov&#160; 3 19:07:14 mds kernel: [468204.720609] INFO: task osp-pre-141-0:5909 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Nov&#160; 3 19:07:14 mds kernel: [468204.720610] &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Nov&#160; 3 19:07:14 mds kernel: [468204.720612] osp-pre-141-0 &#160; D ffff881fd0621fa0 &#160; &#160; 0&#160; 5909&#160; &#160; &#160; 2 0x00000080
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Here are the stack trace from the OSS server.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Nov&#160; 3 19:08:40 oss kernel: [469273.871420] Pid: 85809, comm: ll_ost_io01_185 3.10.0-693.21.1.el7.20180508.x86_64.lustre2105 #1 SMP Mon Aug 27 23:04:41 UTC 2018
Nov&#160; 3 19:08:40 oss kernel: [469273.871421] Call Trace:
Nov&#160; 3 19:08:40 oss kernel: [469273.871450]&#160; [&amp;lt;ffffffffa0179085&amp;gt;] wait_transaction_locked+0x85/0xd0 [jbd2]
Nov&#160; 3 19:08:40 oss kernel: [469273.892337]&#160; [&amp;lt;ffffffffa0179368&amp;gt;] add_transaction_credits+0x268/0x2f0 [jbd2]
Nov&#160; 3 19:08:40 oss kernel: [469273.892340]&#160; [&amp;lt;ffffffffa01795e1&amp;gt;] start_this_handle+0x1a1/0x430 [jbd2]
Nov&#160; 3 19:08:40 oss kernel: [469273.892344]&#160; [&amp;lt;ffffffffa0179a93&amp;gt;] jbd2__journal_start+0xf3/0x1f0 [jbd2]
Nov&#160; 3 19:08:40 oss kernel: [469273.892358]&#160; [&amp;lt;ffffffffa14a7909&amp;gt;] __ldiskfs_journal_start_sb+0x69/0xe0 [ldiskfs]
Nov&#160; 3 19:08:40 oss kernel: [469273.892370]&#160; [&amp;lt;ffffffffa108dc8e&amp;gt;] osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov&#160; 3 19:08:40 oss kernel: [469273.892378]&#160; [&amp;lt;ffffffffa111127e&amp;gt;] ofd_trans_start+0x6e/0xf0 [ofd]
Nov&#160; 3 19:08:40 oss kernel: [469273.892383]&#160; [&amp;lt;ffffffffa111772b&amp;gt;] ofd_commitrw_write+0x94b/0x1c90 [ofd]
Nov&#160; 3 19:08:40 oss kernel: [469273.892394]&#160; [&amp;lt;ffffffffa111b779&amp;gt;] ofd_commitrw+0x4c9/0xae0 [ofd]
Nov&#160; 3 19:08:40 oss kernel: [469273.892456]&#160; [&amp;lt;ffffffffa0c52510&amp;gt;] obd_commitrw+0x2f3/0x336 [ptlrpc]
Nov&#160; 3 19:08:40 oss kernel: [469273.892504]&#160; [&amp;lt;ffffffffa0c25145&amp;gt;] tgt_brw_write+0xfb5/0x1780 [ptlrpc]
Nov&#160; 3 19:08:40 oss kernel: [469273.892539]&#160; [&amp;lt;ffffffffa0c20eca&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
Nov&#160; 3 19:08:40 oss kernel: [469273.892570]&#160; [&amp;lt;ffffffffa0bc94bb&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
Nov&#160; 3 19:08:40 oss kernel: [469273.892600]&#160; [&amp;lt;ffffffffa0bcd4a2&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
Nov&#160; 3 19:08:40 oss kernel: [469273.892609]&#160; [&amp;lt;ffffffff810b1131&amp;gt;] kthread+0xd1/0xe0
Nov&#160; 3 19:08:40 oss kernel: [469273.892616]&#160; [&amp;lt;ffffffff816a14f7&amp;gt;] ret_from_fork+0x77/0xb0
Nov&#160; 3 19:08:40 oss kernel: [469273.892624]&#160; [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
Nov&#160; 3 19:08:40 oss kernel: [469273.892627] LustreError: dumping log to /tmp/lustre-log.1541297320.85809
Nov&#160; 3 19:08:53 oss kernel: [469287.156081] LNet: Service thread pid 85694 was inactive &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 301.11s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; debugging purposes:
Nov&#160; 3 19:08:53 oss kernel: [469287.207070] Pid: 85694, comm: ll_ost_io00_185 3.10.0-693.21.1.el7.20180508.x86_64.lustre2105 #1 SMP Mon Aug 27 23:04:41 UTC 2018
Nov&#160; 3 19:08:53 oss kernel: [469287.207071] Call Trace:
Nov&#160; 3 19:08:53 oss kernel: [469287.207100]&#160; [&amp;lt;ffffffffa0179085&amp;gt;] wait_transaction_locked+0x85/0xd0 [jbd2]
Nov&#160; 3 19:08:53 oss kernel: [469287.227995]&#160; [&amp;lt;ffffffffa0179368&amp;gt;] add_transaction_credits+0x268/0x2f0 [jbd2]
Nov&#160; 3 19:08:53 oss kernel: [469287.227999]&#160; [&amp;lt;ffffffffa01795e1&amp;gt;] start_this_handle+0x1a1/0x430 [jbd2]
Nov&#160; 3 19:08:53 oss kernel: [469287.228002]&#160; [&amp;lt;ffffffffa0179a93&amp;gt;] jbd2__journal_start+0xf3/0x1f0 [jbd2]
Nov&#160; 3 19:08:53 oss kernel: [469287.228010]&#160; [&amp;lt;ffffffffa14a7909&amp;gt;] __ldiskfs_journal_start_sb+0x69/0xe0 [ldiskfs]
Nov&#160; 3 19:08:53 oss kernel: [469287.228024]&#160; [&amp;lt;ffffffffa108dc8e&amp;gt;] osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov&#160; 3 19:08:53 oss kernel: [469287.228036]&#160; [&amp;lt;ffffffffa1117ce3&amp;gt;] ofd_commitrw_write+0xf03/0x1c90 [ofd]
Nov&#160; 3 19:08:53 oss kernel: [469287.228042]&#160; [&amp;lt;ffffffffa111b779&amp;gt;] ofd_commitrw+0x4c9/0xae0 [ofd]
Nov&#160; 3 19:08:53 oss kernel: [469287.228084]&#160; [&amp;lt;ffffffffa0c52510&amp;gt;] obd_commitrw+0x2f3/0x336 [ptlrpc]
Nov&#160; 3 19:08:53 oss kernel: [469287.228125]&#160; [&amp;lt;ffffffffa0c25145&amp;gt;] tgt_brw_write+0xfb5/0x1780 [ptlrpc]
Nov&#160; 3 19:08:53 oss kernel: [469287.228160]&#160; [&amp;lt;ffffffffa0c20eca&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
Nov&#160; 3 19:08:53 oss kernel: [469287.228192]&#160; [&amp;lt;ffffffffa0bc94bb&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
Nov&#160; 3 19:08:53 oss kernel: [469287.228224]&#160; [&amp;lt;ffffffffa0bcd4a2&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
Nov&#160; 3 19:08:53 oss kernel: [469287.228227]&#160; [&amp;lt;ffffffff810b1131&amp;gt;] kthread+0xd1/0xe0
Nov&#160; 3 19:08:53 oss kernel: [469287.228230]&#160; [&amp;lt;ffffffff816a14f7&amp;gt;] ret_from_fork+0x77/0xb0
Nov&#160; 3 19:08:53 oss kernel: [469287.228235]&#160; [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;If you need complete crash dump then let me know and I will upload it.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; Manish&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</description>
                <environment>CentOS Linux release 7.5.1804 (Core) &lt;br/&gt;
Kernel Version 3.10.0-693.21.1.el7.20180508.x86_64.lustre2105 &lt;br/&gt;
e2fsprogs-libs-1.44.3.wc1-0.el7.x86_64 &lt;br/&gt;
e2fsprogs-1.44.3.wc1-0.el7.x86_64 &lt;br/&gt;
e2fsprogs-static-1.44.3.wc1-0.el7.x86_64 &lt;br/&gt;
e2fsprogs-devel-1.44.3.wc1-0.el7.x86_64 &lt;br/&gt;
e2fsprogs-debuginfo-1.44.3.wc1-0.el7.x86_64</environment>
        <key id="53909">LU-11613</key>
            <summary>MDS and OSS locked up wait_transaction_locked+0x85/0xd0 [jbd2]</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="manishpatel">Manish</reporter>
                        <labels>
                    </labels>
                <created>Sun, 4 Nov 2018 06:48:43 +0000</created>
                <updated>Tue, 27 Aug 2019 17:55:10 +0000</updated>
                            <resolved>Tue, 27 Aug 2019 17:55:10 +0000</resolved>
                                    <version>Lustre 2.10.5</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>13</watches>
                                                                            <comments>
                            <comment id="236295" author="pjones" created="Sun, 4 Nov 2018 14:06:46 +0000"  >&lt;p&gt;Manish&lt;/p&gt;

&lt;p&gt;It sounds like this is a disruptive bug to production rather than the whole system is completely out of service so I have adjusted the severity accordingly.&lt;/p&gt;

&lt;p&gt;Alex&lt;/p&gt;

&lt;p&gt;Can you please investigate?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="236296" author="bzzz" created="Sun, 4 Nov 2018 15:09:15 +0000"  >&lt;p&gt;can you please attach backtraces for all the processes?&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236298" author="manishpatel" created="Sun, 4 Nov 2018 19:19:59 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;I have uploaded the stack trace to the FTP site and file name is &quot;foreach_bt.txt&quot; under ticket dir.&lt;/p&gt;

&lt;p&gt;Let me know if you are able to view that file.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; Manish&lt;/p&gt;</comment>
                            <comment id="236344" author="manishpatel" created="Mon, 5 Nov 2018 19:29:08 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;We hit this issue again with one of our other file system and the stack traces looks similar. I have already uploaded the new stack trace to FTP site and file names are&#160;&lt;/p&gt;

&lt;p&gt;foreach_bt_mds_11-05-2018.txt&lt;/p&gt;

&lt;p&gt;foreach_bt_oss_11-05-2018.txt&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Can we please raise the level of priority as this is now ongoing issues with most of our lustre file system with lustre upgrade with 2.10.5 version.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; Manish&lt;/p&gt;</comment>
                            <comment id="236351" author="bzzz" created="Mon, 5 Nov 2018 21:02:39 +0000"  >&lt;p&gt;thanks, it will take some time to analyse the traces.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236430" author="bzzz" created="Tue, 6 Nov 2018 14:47:09 +0000"  >&lt;p&gt;Manish, please tell how many MDSs are used and whether LFSCK was running at that time?&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236435" author="mhanafi" created="Tue, 6 Nov 2018 15:07:12 +0000"  >&lt;p&gt;There is only one mds and fsck was not running.&#160;&lt;/p&gt;</comment>
                            <comment id="236509" author="hakanson" created="Wed, 7 Nov 2018 03:07:08 +0000"  >&lt;p&gt;FYI, our lockups associated with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11465&quot; title=&quot;OSS/MDS deadlock in 2.10.5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11465&quot;&gt;&lt;del&gt;LU-11465&lt;/del&gt;&lt;/a&gt; show a high load average on the affected server (MDS for our case), but the CPU utilization is &lt;em&gt;not&lt;/em&gt; high.&#160; In fact the MDS is quite responsive, CPUs are pretty much idle, but the high load average is due to all of the stuck &quot;D-state&quot; processes which are runnable, but deadlocked.&lt;/p&gt;

&lt;p&gt;Another difference from our &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11465&quot; title=&quot;OSS/MDS deadlock in 2.10.5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11465&quot;&gt;&lt;del&gt;LU-11465&lt;/del&gt;&lt;/a&gt; situation is that rebooting the stuck server does not resolve the problem for us.&#160; We must force-unmount or reboot all the clients as well.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236538" author="bzzz" created="Wed, 7 Nov 2018 10:36:08 +0000"  >&lt;p&gt;it would be very helpful if you can provide Lustre logs and/or dmesg messages. I think I understand the cause (and have patches in testing), but I still don&apos;t understand how we got into that situation..&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236540" author="bzzz" created="Wed, 7 Nov 2018 11:17:40 +0000"  >&lt;p&gt;do you use (or used) project quota?&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236544" author="bzzz" created="Wed, 7 Nov 2018 13:03:44 +0000"  >&lt;p&gt;AFAICS, some client sends chown/chgrp to the quota-enabled MDS which in turn needs to consult with OST on possibility to move file into another group. this file is being actively written so OST hits a deadlock (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10048&quot; title=&quot;osd-ldiskfs to truncate outside of main transaction&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10048&quot;&gt;&lt;del&gt;LU-10048&lt;/del&gt;&lt;/a&gt;). until that client is stopped it keeps resending RPC after reboot.&lt;/p&gt;

&lt;p&gt;though I can&apos;t really explain that OST hits it again and again.. that&apos;s supposed to be a race-like condition.&lt;/p&gt;</comment>
                            <comment id="236555" author="manishpatel" created="Wed, 7 Nov 2018 15:06:57 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;We have never used project quota. I will share dmesg logs soon with Oleg and you can get more details form him.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;Manish&lt;/p&gt;</comment>
                            <comment id="236562" author="manishpatel" created="Wed, 7 Nov 2018 16:28:20 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;Sorry for the confusion about what logs I can share as I am trying to get familiar with security limitations on sharing data. Well so I have uploaded the dmesg logs on FTP site un ticket directory and files are &quot;mds-vmcore-dmesg.txt&quot; and &quot;oss-vmcore-dmesg.txt&quot;.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; Manish&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236563" author="bzzz" created="Wed, 7 Nov 2018 16:33:29 +0000"  >&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
$ pwd
/scratch/ftp/uploads/LU-11613
[alexeyzh@ssh-2 LU-11613]$ ls -l
total 4448
-rw-r--r-- 1 nobody ftp&#160; 115858 Nov&#160; 4 16:42 foreach_backtrace.zip
-rw-r--r-- 1 nobody ftp 1724395 Nov&#160; 5 19:23 foreach_bt_mds_11-05-2018.txt
-rw-r--r-- 1 nobody ftp 1315475 Nov&#160; 5 19:23 foreach_bt_oss_11-05-2018.txt
-rw-r--r-- 1 nobody ftp 1389778 Nov&#160; 4 19:16 foreach_bt.txt&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;probably into a different dir?&lt;/p&gt;</comment>
                            <comment id="236565" author="manishpatel" created="Wed, 7 Nov 2018 16:50:07 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;No it&apos;s in same dir.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
ftp.whamcloud.com:/uploads/LU-11613&amp;gt; ls
-rw-r--r--&#160; &#160; 1 99 &#160; &#160; &#160; 50 &#160; &#160; &#160; &#160; 115858 Nov 04 16:42 foreach_backtrace.zip
-rw-r--r--&#160; &#160; 1 99 &#160; &#160; &#160; 50&#160; &#160; &#160; &#160; 1389778 Nov 04 19:16 foreach_bt.txt
-rw-r--r--&#160; &#160; 1 99 &#160; &#160; &#160; 50&#160; &#160; &#160; &#160; 1724395 Nov 05 19:23 foreach_bt_mds_11-05-2018.txt
-rw-r--r--&#160; &#160; 1 99 &#160; &#160; &#160; 50&#160; &#160; &#160; &#160; 1315475 Nov 05 19:23 foreach_bt_oss_11-05-2018.txt
-rw-r--r--&#160; &#160; 1 99 &#160; &#160; &#160; 50&#160; &#160; &#160; &#160; 1042487 Nov 07 16:25 mds-vmcore-dmesg.txt
-rw-r--r--&#160; &#160; 1 99 &#160; &#160; &#160; 50&#160; &#160; &#160; &#160; 1042487 Nov 07 16:26 oss-vmcore-dmesg.txt
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Please re-fresh dir at your end.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;Manish&lt;/p&gt;</comment>
                            <comment id="236652" author="bzzz" created="Thu, 8 Nov 2018 15:02:46 +0000"  >&lt;p&gt;did you upgrade Lustre recently? if yes, what the previous version was?&lt;br/&gt;
any idea about the application(s) the cluster is running? do you think chown/chgrp is a frequent operation?&lt;/p&gt;

&lt;p&gt;I&apos;m able to reproduce the issue and the fix seem to work, but we&apos;d like to understand why did you start to experience the issue.&lt;/p&gt;</comment>
                            <comment id="236676" author="mhanafi" created="Thu, 8 Nov 2018 18:21:33 +0000"  >&lt;p&gt;Yes these filesystem were updated from 2.7 to 2.10.5 and that is when we started to see this issue.&lt;/p&gt;

&lt;p&gt;It is unlikely that there are frequent chown/chgrop operation.&lt;/p&gt;

&lt;p&gt;These are our larger filesystems and there are many applications running at the same time. It is difficult to isolate the issue to a simple type of app.&lt;/p&gt;</comment>
                            <comment id="236706" author="dauchy" created="Fri, 9 Nov 2018 00:26:22 +0000"  >&lt;p&gt;Hit this problem at NOAA too, within 1 day of upgrading servers from 2.7 to 2.10.5 and clients from 2.10.4 to 2.10.5.&#160; See DDN Case #112165 for details.&lt;/p&gt;

&lt;p&gt;At NOAA, chgrp is commonly used (to keep ownership aligned with top-level &quot;project&quot; directory, which we soon hope to replace with project quotas).&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236709" author="hakanson" created="Fri, 9 Nov 2018 01:28:36 +0000"  >&lt;p&gt;We use chgrp for the same reasons as the NOAA poster above.&lt;/p&gt;

&lt;p&gt;While we wait for the fix to be released, can we avoid this lockup by disabling quota enforcement, or removing all our group quotas?&lt;/p&gt;

&lt;p&gt;Or must we disable the chown/chgrp commands cluster-wide?&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236713" author="cwhite_ddn" created="Fri, 9 Nov 2018 02:40:34 +0000"  >&lt;p&gt;Adding MDS call traces from DDN 112165&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Nov  8 17:18:50 lfs-mds-1-1 kernel: Lustre: lfs1-MDT0000: Connection restored to 4de02ddb-11dd-0c3b-e5a4-6b3f42554a8e (at 10.179.38.35@o2ib)
Nov  8 18:58:23 lfs-mds-1-1 kernel: Lustre: lfs1-MDT0000: Connection restored to a81b824f-2064-b0a5-e927-1bb9bd32bb32 (at 10.179.126.13@o2ib)
Nov  8 19:11:19 lfs-mds-1-1 kernel: INFO: task jbd2/dm-21-8:41769 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Nov  8 19:11:19 lfs-mds-1-1 kernel: &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Nov  8 19:11:19 lfs-mds-1-1 kernel: jbd2/dm-21-8    D ffff922a486a9fa0     0 41769      2 0x00000000
Nov  8 19:11:19 lfs-mds-1-1 kernel: Call Trace:
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffaeb14029&amp;gt;] schedule+0x29/0x70
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04a82ac&amp;gt;] jbd2_journal_commit_transaction+0x23c/0x19b0 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4d52ce&amp;gt;] ? account_entity_dequeue+0xae/0xd0
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4d8e4c&amp;gt;] ? dequeue_entity+0x11c/0x5e0
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4d2765&amp;gt;] ? sched_clock_cpu+0x85/0xc0
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae42959e&amp;gt;] ? __switch_to+0xce/0x580
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bc610&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffaeb139fc&amp;gt;] ? __schedule+0x41c/0xa20
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4a3eae&amp;gt;] ? try_to_del_timer_sync+0x5e/0x90
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04aeab9&amp;gt;] kjournald2+0xc9/0x260 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bc610&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04ae9f0&amp;gt;] ? commit_timeout+0x10/0x10 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bb621&amp;gt;] kthread+0xd1/0xe0
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bb550&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffaeb205f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bb550&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov  8 19:11:19 lfs-mds-1-1 kernel: INFO: task mdt00_002:41819 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Nov  8 19:11:19 lfs-mds-1-1 kernel: &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Nov  8 19:11:19 lfs-mds-1-1 kernel: mdt00_002       D ffff922a48a9bf40     0 41819      2 0x00000000
Nov  8 19:11:19 lfs-mds-1-1 kernel: Call Trace:
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f05240&amp;gt;] ? ldlm_pool_add+0x80/0xf0 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffaeb14029&amp;gt;] schedule+0x29/0x70
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04a5085&amp;gt;] wait_transaction_locked+0x85/0xd0 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bc610&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04a5368&amp;gt;] add_transaction_credits+0x268/0x2f0 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04a55e1&amp;gt;] start_this_handle+0x1a1/0x430 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc136a06a&amp;gt;] ? osd_declare_write+0x1ea/0x450 [osd_ldiskfs]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae5f7bc2&amp;gt;] ? kmem_cache_alloc+0x1c2/0x1f0
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc04a5a93&amp;gt;] jbd2__journal_start+0xf3/0x1f0 [jbd2]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc134bcde&amp;gt;] ? osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1225179&amp;gt;] __ldiskfs_journal_start_sb+0x69/0xe0 [ldiskfs]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc134bcde&amp;gt;] osd_trans_start+0x1ae/0x460 [osd_ldiskfs]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc159b5d7&amp;gt;] mdt_empty_transno+0xf7/0x840 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc159e70e&amp;gt;] mdt_mfd_open+0x8de/0xe70 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1576042&amp;gt;] ? mdt_pack_acl2body+0x1b2/0x800 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc159f21b&amp;gt;] mdt_finish_open+0x57b/0x690 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc15a0af8&amp;gt;] mdt_reint_open+0x17c8/0x3190 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f1fef7&amp;gt;] ? lustre_msg_add_version+0x27/0xa0 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0d5ca5e&amp;gt;] ? lu_ucred+0x1e/0x30 [obdclass]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1585ad5&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1586401&amp;gt;] ? mdt_root_squash+0x21/0x430 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1596213&amp;gt;] mdt_reint_rec+0x83/0x210 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc157736b&amp;gt;] mdt_reint_internal+0x5fb/0x9c0 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1577892&amp;gt;] mdt_intent_reint+0x162/0x480 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc1582721&amp;gt;] mdt_intent_policy+0x441/0xc70 [mdt]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0ed71db&amp;gt;] ? ldlm_resource_get+0xab/0xa60 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0ed02ba&amp;gt;] ldlm_lock_enqueue+0x38a/0x980 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0ef9cf3&amp;gt;] ldlm_handle_enqueue0+0x9d3/0x16a0 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f21fa0&amp;gt;] ? lustre_swab_ldlm_lock_desc+0x30/0x30 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f824e2&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f8641a&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f2bfeb&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f28618&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4cf682&amp;gt;] ? default_wake_function+0x12/0x20
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4c52ab&amp;gt;] ? __wake_up_common+0x5b/0x90
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f2f732&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffc0f2eca0&amp;gt;] ? ptlrpc_register_service+0xe30/0xe30 [ptlrpc]
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bb621&amp;gt;] kthread+0xd1/0xe0
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bb550&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffaeb205f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
Nov  8 19:11:19 lfs-mds-1-1 kernel:  [&amp;lt;ffffffffae4bb550&amp;gt;] ? insert_kthread_work+0x40/0x40
Nov  8 19:11:19 lfs-mds-1-1 kernel: INFO: task mdt02_000:41823 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="238230" author="pjones" created="Sun, 9 Dec 2018 01:30:05 +0000"  >&lt;p&gt;The revert of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5152&quot; title=&quot;Can&amp;#39;t enforce block quota when unprivileged user change group&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5152&quot;&gt;&lt;del&gt;LU-5152&lt;/del&gt;&lt;/a&gt; in 2.10.6 should remove the possibility to hit this issue&lt;/p&gt;</comment>
                            <comment id="238263" author="bzzz" created="Mon, 10 Dec 2018 08:12:39 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=cwhite_ddn&quot; class=&quot;user-hover&quot; rel=&quot;cwhite_ddn&quot;&gt;cwhite_ddn&lt;/a&gt; please add all remaining stack traces if possible&lt;/p&gt;</comment>
                            <comment id="238680" author="pjones" created="Mon, 17 Dec 2018 14:04:43 +0000"  >&lt;p&gt;Let&apos;s not conflate similar (or even identical) issues from different sites with this support request from NASA. &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=manishpatel&quot; class=&quot;user-hover&quot; rel=&quot;manishpatel&quot;&gt;manishpatel&lt;/a&gt; has this issue been seen with 2.10.6?&lt;/p&gt;</comment>
                            <comment id="238687" author="manishpatel" created="Mon, 17 Dec 2018 16:21:55 +0000"  >&lt;p&gt;Hi Peter,&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;We are still on 2.10.5 version and working few other issues to address with this one. So we may not get 2.10.6 version and may rather end up with newer released version based on downtime slots we get. Also we have got the new build ready without &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5152&quot; title=&quot;Can&amp;#39;t enforce block quota when unprivileged user change group&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5152&quot;&gt;&lt;del&gt;LU-5152&lt;/del&gt;&lt;/a&gt; patch, but all depends when we get a chance to implement it, so if that happens then we will update it and let you know if we still experience this issues.&lt;/p&gt;

&lt;p&gt;Thank You,&lt;/p&gt;

&lt;p&gt;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;Manish&lt;/p&gt;</comment>
                            <comment id="253705" author="adilger" created="Tue, 27 Aug 2019 17:55:10 +0000"  >&lt;p&gt;The patch &lt;a href=&quot;https://review.whamcloud.com/31293&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31293&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10048&quot; title=&quot;osd-ldiskfs to truncate outside of main transaction&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10048&quot;&gt;&lt;del&gt;LU-10048&lt;/del&gt;&lt;/a&gt; ofd: take local locks within transaction&lt;/tt&gt;&quot; landed.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="48520">LU-10048</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="53484">LU-11465</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i005nz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>