<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:19:33 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1772] Test failure on test suite racer, subtest test_1</title>
                <link>https://jira.whamcloud.com/browse/LU-1772</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/9d777c2e-e9a0-11e1-881a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/9d777c2e-e9a0-11e1-881a-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_1 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;There are some hanging threads on client side&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;14:14:58:Lustre: DEBUG MARKER: == racer test 1: racer on clients: client-26vm5,client-26vm6.lab.whamcloud.com DURATION=900 == 14:14:56 (1345238096)
14:14:59:Lustre: DEBUG MARKER: PATH=/usr/lib64/lustre/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:/opt/iozone/bin:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lustre/tests/racer:/usr/lib64/lustre/../lustre-iokit/sgpdd-survey:/usr/lib64/lustre/tests:/usr/lib64/lustre/u
14:14:59:Lustre: DEBUG MARKER: DURATION=900 /usr/lib64/lustre/tests/racer/racer.sh /mnt/lustre2/racer 
14:15:08:Lustre: DEBUG MARKER: DURATION=900 /usr/lib64/lustre/tests/racer/racer.sh /mnt/lustre/racer 
14:18:31:LustreError: 11-0: an error occurred while communicating with 10.10.4.151@tcp. The ost_write operation failed with -2
14:18:31:LustreError: 11-0: an error occurred while communicating with 10.10.4.151@tcp. The ost_write operation failed with -2
14:18:32:LustreError: 9716:0:(vvp_io.c:1039:vvp_io_commit_write()) Write page 34217 of inode ffff880078a36178 failed -2
14:18:32:Lustre: lustre-MDT0000-mdc-ffff880074b62000: Connection to lustre-MDT0000 (at 10.10.4.150@tcp) was lost; in progress operations using this service will wait for recovery to complete
14:18:32:LustreError: 167-0: This client was evicted by lustre-MDT0000; in progress operations using this service will fail.
14:18:32:LustreError: 9716:0:(file.c:155:ll_close_inode_openhandle()) inode 144115205272502283 mdc close failed: rc = -5
14:18:32:LustreError: 23544:0:(ldlm_resource.c:761:ldlm_resource_complain()) Namespace lustre-MDT0000-mdc-ffff880074b62000 resource refcount nonzero (1) after lock cleanup; forcing cleanup.
14:18:34:LustreError: 23544:0:(ldlm_resource.c:767:ldlm_resource_complain()) Resource: ffff88007a1c2480 (8589935617/1747/0/0) (rc: 1)
14:18:34:Lustre: lustre-MDT0000-mdc-ffff880074b62000: Connection restored to lustre-MDT0000 (at 10.10.4.150@tcp)
14:18:36:LustreError: 11-0: an error occurred while communicating with 10.10.4.151@tcp. The obd_ping operation failed with -107
14:18:36:LustreError: Skipped 1 previous similar message
14:18:36:Lustre: lustre-OST0001-osc-ffff880074b62000: Connection to lustre-OST0001 (at 10.10.4.151@tcp) was lost; in progress operations using this service will wait for recovery to complete
14:18:36:LustreError: 167-0: This client was evicted by lustre-OST0001; in progress operations using this service will fail.
14:19:14:INFO: task ldlm_bl_02:8789 blocked for more than 120 seconds.
14:19:14:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:14:ldlm_bl_02    D 0000000000000000     0  8789      2 0x00000080
14:19:15: ffff8800763fdd50 0000000000000046 0000000100000000 0000000000000000
14:19:15: ffff8800763fdce0 ffffffffa04fb262 ffff8800763fdd40 0000000000000001
14:19:15: ffff8800377e4638 ffff8800763fdfd8 000000000000fb88 ffff8800377e4638
14:19:15:Call Trace:
14:19:15: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:15: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:15: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:15: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:15: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:15: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:15: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:15: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:15: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:17: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:17: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:17: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:17: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:17: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:17: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:17:INFO: task ldlm_bl_03:8790 blocked for more than 120 seconds.
14:19:17:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:17:ldlm_bl_03    D 0000000000000000     0  8790      2 0x00000080
14:19:17: ffff880052c8dd50 0000000000000046 0000000100000000 0000000000000000
14:19:18: ffff880052c8dce0 ffffffffa04fb262 ffff880052c8dd40 0000000000000001
14:19:18: ffff88007a4545f8 ffff880052c8dfd8 000000000000fb88 ffff88007a4545f8
14:19:18:Call Trace:
14:19:18: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:18: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:18: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:19: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:19: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:19: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:19: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:19: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:19: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:19: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:19: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:19: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:19: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:19: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:19: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:19:INFO: task ldlm_bl_05:9660 blocked for more than 120 seconds.
14:19:19:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:19:ldlm_bl_05    D 0000000000000000     0  9660      2 0x00000080
14:19:19: ffff88007adbbd50 0000000000000046 0000000100000000 0000000000000000
14:19:20: ffff88007adbbce0 ffffffffa04fb262 ffff88007adbbd40 0000000000000001
14:19:20: ffff880075433098 ffff88007adbbfd8 000000000000fb88 ffff880075433098
14:19:20:Call Trace:
14:19:21: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:21: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:21: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:21: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:21: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:21: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:21: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:22: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:22: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:22: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:22: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:22: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:22: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:22: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:22: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:22:INFO: task ldlm_bl_08:9663 blocked for more than 120 seconds.
14:19:22:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:23:ldlm_bl_08    D 0000000000000000     0  9663      2 0x00000080
14:19:23: ffff88007ade1d50 0000000000000046 0000000100000000 0000000000000000
14:19:24: ffff88007ade1ce0 ffffffffa04fb262 ffff88007ade1d40 0000000000000001
14:19:24: ffff88007a1ce5f8 ffff88007ade1fd8 000000000000fb88 ffff88007a1ce5f8
14:19:24:Call Trace:
14:19:24: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:24: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:24: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:24: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:24: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:24: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:24: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:24: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:24: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:25: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:25: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:25: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:27: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:27: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:27: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:27:INFO: task ldlm_bl_12:9667 blocked for more than 120 seconds.
14:19:27:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:27:ldlm_bl_12    D 0000000000000000     0  9667      2 0x00000080
14:19:27: ffff880053011ad8 0000000000000046 000000000027a41e ffff880078a56030
14:19:27: ffff880078a57000 ffff880053011a90 ffffffff81039678 ffff8800ffffffff
14:19:27: ffff88005300fab8 ffff880053011fd8 000000000000fb88 ffff88005300fab8
14:19:28:Call Trace:
14:19:28: [&amp;lt;ffffffff81039678&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
14:19:28: [&amp;lt;ffffffff814ffec5&amp;gt;] rwsem_down_failed_common+0x95/0x1d0
14:19:28: [&amp;lt;ffffffff81500056&amp;gt;] rwsem_down_read_failed+0x26/0x30
14:19:28: [&amp;lt;ffffffff8127e664&amp;gt;] call_rwsem_down_read_failed+0x14/0x30
14:19:28: [&amp;lt;ffffffff814ff554&amp;gt;] ? down_read+0x24/0x30
14:19:28: [&amp;lt;ffffffffa09a4f04&amp;gt;] lov_lsm_addref+0x34/0x150 [lov]
14:19:28: [&amp;lt;ffffffffa09a56e5&amp;gt;] lov_io_init+0x75/0x1c0 [lov]
14:19:28: [&amp;lt;ffffffffa0659378&amp;gt;] cl_io_init0+0x98/0x160 [obdclass]
14:19:29: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:29: [&amp;lt;ffffffffa065c264&amp;gt;] cl_io_init+0x64/0x100 [obdclass]
14:19:29: [&amp;lt;ffffffffa0653fa4&amp;gt;] cl_lock_discard_pages+0x64/0x1f0 [obdclass]
14:19:30: [&amp;lt;ffffffffa09004e0&amp;gt;] osc_lock_flush+0x110/0x200 [osc]
14:19:30: [&amp;lt;ffffffffa0900629&amp;gt;] osc_lock_cancel+0x59/0x1a0 [osc]
14:19:30: [&amp;lt;ffffffffa0651dc5&amp;gt;] cl_lock_cancel0+0x75/0x160 [obdclass]
14:19:30: [&amp;lt;ffffffffa0652a2b&amp;gt;] cl_lock_cancel+0x13b/0x140 [obdclass]
14:19:30: [&amp;lt;ffffffffa090230a&amp;gt;] osc_ldlm_blocking_ast+0x13a/0x380 [osc]
14:19:30: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:31: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:31: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:31: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:31: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:31: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:31: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:31: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:31:INFO: task ldlm_bl_13:9668 blocked for more than 120 seconds.
14:19:32:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:32:ldlm_bl_13    D 0000000000000000     0  9668      2 0x00000080
14:19:32: ffff880053015d50 0000000000000046 0000000100000000 0000000000000000
14:19:32: ffff880053015ce0 ffffffffa04fb262 ffff880053015d40 0000000000000001
14:19:32: ffff88005300f058 ffff880053015fd8 000000000000fb88 ffff88005300f058
14:19:32:Call Trace:
14:19:33: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:33: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:33: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:33: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:33: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:34: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:34: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:34: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:34: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:34: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:34: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:35: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:35: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:35: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:35: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:35:INFO: task ldlm_bl_15:9676 blocked for more than 120 seconds.
14:19:35:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:35:ldlm_bl_15    D 0000000000000000     0  9676      2 0x00000080
14:19:36: ffff880053155d50 0000000000000046 0000000100000000 0000000000000000
14:19:36: ffff880053155ce0 ffffffffa04fb262 ffff880053155d40 0000000000000001
14:19:36: ffff880053057af8 ffff880053155fd8 000000000000fb88 ffff880053057af8
14:19:36:Call Trace:
14:19:36: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:36: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:36: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:36: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:36: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:36: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:37: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:37: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:37: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:37: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:37: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:37: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:38: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:38: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:38: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:39:INFO: task ldlm_bl_18:10487 blocked for more than 120 seconds.
14:19:39:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:39:ldlm_bl_18    D 0000000000000000     0 10487      2 0x00000080
14:19:39: ffff880052cffd50 0000000000000046 0000000100000000 0000000000000000
14:19:39: ffff880052cffce0 ffffffffa04fb262 ffff880052cffd40 0000000000000001
14:19:39: ffff8800532c1098 ffff880052cfffd8 000000000000fb88 ffff8800532c1098
14:19:39:Call Trace:
14:19:39: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:40: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:40: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:40: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:41: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:41: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:41: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:41: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:41: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:41: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:41: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:41: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:42: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:42: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:42: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:42:INFO: task ldlm_bl_23:10492 blocked for more than 120 seconds.
14:19:42:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:42:ldlm_bl_23    D 0000000000000000     0 10492      2 0x00000080
14:19:43: ffff880053d7bd50 0000000000000046 0000000100000000 0000000000000000
14:19:43: ffff880053d7bce0 ffffffffa04fb262 ffff880053d7bd40 0000000000000001
14:19:43: ffff880053d75098 ffff880053d7bfd8 000000000000fb88 ffff880053d75098
14:19:43:Call Trace:
14:19:43: [&amp;lt;ffffffffa04fb262&amp;gt;] ? cfs_hash_bd_add_locked+0x62/0x90 [libcfs]
14:19:43: [&amp;lt;ffffffffa04fb0c4&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
14:19:43: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:43: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:44: [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
14:19:44: [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
14:19:44: [&amp;lt;ffffffffa04f6521&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
14:19:44: [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
14:19:45: [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
14:19:45: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
14:19:45: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:45: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
14:19:45: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:45: [&amp;lt;ffffffffa077fe90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3d0 [ptlrpc]
14:19:47: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
14:19:47:INFO: task ls:21953 blocked for more than 120 seconds.
14:19:47:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
14:19:47:ls            D 0000000000000000     0 21953   9441 0x00000080
14:19:47: ffff88004288bae8 0000000000000082 000000000000009e 0020000000000080
14:19:47: 502eb4be00000000 00000000000ee1f8 000055c100000000 00000a2d00000000
14:19:47: ffff88007920faf8 ffff88004288bfd8 000000000000fb88 ffff88007920faf8
14:19:48:Call Trace:
14:19:48: [&amp;lt;ffffffffa04f5e63&amp;gt;] ? libcfs_debug_vmsg2+0x4e3/0xb60 [libcfs]
14:19:48: [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
14:19:48: [&amp;lt;ffffffff81193788&amp;gt;] ? __d_lookup+0xd8/0x150
14:19:48: [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
14:19:48: [&amp;lt;ffffffff8118969b&amp;gt;] do_lookup+0x11b/0x230
14:19:48: [&amp;lt;ffffffff81189abd&amp;gt;] __link_path_walk+0x20d/0x1030
14:19:48: [&amp;lt;ffffffff81097e2f&amp;gt;] ? up+0x2f/0x50
14:19:48: [&amp;lt;ffffffffa0a55dbc&amp;gt;] ? ll_follow_link+0x1ec/0x260 [lustre]
14:19:48: [&amp;lt;ffffffff8118a647&amp;gt;] __link_path_walk+0xd97/0x1030
14:19:48: [&amp;lt;ffffffff8118ab6a&amp;gt;] path_walk+0x6a/0xe0
14:19:48: [&amp;lt;ffffffff8118ad3b&amp;gt;] do_path_lookup+0x5b/0xa0
14:19:50: [&amp;lt;ffffffff8117c780&amp;gt;] ? get_empty_filp+0xa0/0x180
14:19:50: [&amp;lt;ffffffff8118bc6b&amp;gt;] do_filp_open+0xfb/0xd60
14:19:50: [&amp;lt;ffffffff8119a460&amp;gt;] ? mntput_no_expire+0x30/0x110
14:19:50: [&amp;lt;ffffffff811982b2&amp;gt;] ? alloc_fd+0x92/0x160
14:19:50: [&amp;lt;ffffffff81178769&amp;gt;] do_sys_open+0x69/0x140
14:19:51: [&amp;lt;ffffffff81178880&amp;gt;] sys_open+0x20/0x30
14:19:51: [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>server and client: lustre-master-tag2.2.93 RHEL6</environment>
        <key id="15532">LU-1772</key>
            <summary>Test failure on test suite racer, subtest test_1</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 20 Aug 2012 13:35:14 +0000</created>
                <updated>Sun, 2 Sep 2012 15:06:36 +0000</updated>
                            <resolved>Sun, 2 Sep 2012 15:06:36 +0000</resolved>
                                    <version>Lustre 2.3.0</version>
                                    <fixVersion>Lustre 2.3.0</fixVersion>
                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="43519" author="pjones" created="Mon, 20 Aug 2012 19:19:53 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="43547" author="laisiyao" created="Tue, 21 Aug 2012 07:30:47 +0000"  >&lt;p&gt;Log shows that both MDT and OST timeout on lock cancel, and finally it cause client eviction. But the client log is not complete, I can&apos;t see why the lock cancel timeouts there. I&apos;ll read more of the logs and try to reproduce it.&lt;/p&gt;</comment>
                            <comment id="43695" author="laisiyao" created="Thu, 23 Aug 2012 11:39:17 +0000"  >&lt;p&gt;Below backtraces show that there may be a deadlock, but I haven&apos;t figured out yet, I&apos;ll read more CLIO code.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;
ldlm_bl_12    D 0000000000000000     0  9667      2 0x00000080
 ffff880053011ad8 0000000000000046 000000000027a41e ffff880078a56030
 ffff880078a57000 ffff880053011a90 ffffffff81039678 ffff8800ffffffff
 ffff88005300fab8 ffff880053011fd8 000000000000fb88 ffff88005300fab8
Call Trace:
 [&amp;lt;ffffffffa09a4f04&amp;gt;] lov_lsm_addref+0x34/0x150 [lov]
 [&amp;lt;ffffffffa09a56e5&amp;gt;] lov_io_init+0x75/0x1c0 [lov]
 [&amp;lt;ffffffffa0659378&amp;gt;] cl_io_init0+0x98/0x160 [obdclass]
 [&amp;lt;ffffffffa065c264&amp;gt;] cl_io_init+0x64/0x100 [obdclass]
 [&amp;lt;ffffffffa0653fa4&amp;gt;] cl_lock_discard_pages+0x64/0x1f0 [obdclass]
 [&amp;lt;ffffffffa09004e0&amp;gt;] osc_lock_flush+0x110/0x200 [osc]
 [&amp;lt;ffffffffa0900629&amp;gt;] osc_lock_cancel+0x59/0x1a0 [osc]
 [&amp;lt;ffffffffa0651dc5&amp;gt;] cl_lock_cancel0+0x75/0x160 [obdclass]
 [&amp;lt;ffffffffa0652a2b&amp;gt;] cl_lock_cancel+0x13b/0x140 [obdclass]
 [&amp;lt;ffffffffa090230a&amp;gt;] osc_ldlm_blocking_ast+0x13a/0x380 [osc]
 [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
 [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
 [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
mv            D 0000000000000000     0 23117      1 0x00000080
 ffff88004078d928 0000000000000086 00000000ffffffff 000000000078d908
 ffff880000000030 ffff88004078d9a8 ffff88004078d938 ffffffff81039678
 ffff880078895058 ffff88004078dfd8 000000000000fb88 ffff880078895058
Call Trace:
 [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
 [&amp;lt;ffffffffa0657b76&amp;gt;] cl_lock_hold_mutex+0x96/0x6c0 [obdclass]
 [&amp;lt;ffffffffa06581d3&amp;gt;] cl_lock_hold+0x33/0x100 [obdclass]
 [&amp;lt;ffffffffa09abc9d&amp;gt;] lov_sublock_alloc+0x11d/0x470 [lov]
 [&amp;lt;ffffffffa09acc04&amp;gt;] lov_lock_init_raid0+0x3e4/0xed0 [lov]
 [&amp;lt;ffffffffa09a5338&amp;gt;] lov_lock_init+0x68/0xe0 [lov]
 [&amp;lt;ffffffffa0657e7c&amp;gt;] cl_lock_hold_mutex+0x39c/0x6c0 [obdclass]
 [&amp;lt;ffffffffa0658302&amp;gt;] cl_lock_request+0x62/0x280 [obdclass]
 [&amp;lt;ffffffffa0a6a6bb&amp;gt;] cl_glimpse_lock+0x17b/0x4a0 [lustre]
 [&amp;lt;ffffffffa0a6af47&amp;gt;] cl_glimpse_size0+0x187/0x190 [lustre]
 [&amp;lt;ffffffffa0a27f62&amp;gt;] ll_inode_revalidate_it+0xf2/0x1c0 [lustre]
 [&amp;lt;ffffffffa0a28079&amp;gt;] ll_getattr_it+0x49/0x170 [lustre]
 [&amp;lt;ffffffffa0a281d7&amp;gt;] ll_getattr+0x37/0x40 [lustre]
 [&amp;lt;ffffffff81180571&amp;gt;] vfs_getattr+0x51/0x80
 [&amp;lt;ffffffff81180600&amp;gt;] vfs_fstatat+0x60/0x80
 [&amp;lt;ffffffff8118068e&amp;gt;] vfs_lstat+0x1e/0x20
 [&amp;lt;ffffffff811806b4&amp;gt;] sys_newlstat+0x24/0x50
 [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
mkdir         D 0000000000000000     0 23111      1 0x00000080
 ffff8800407c7788 0000000000000086 0000000000000099 0001000000010000
 502eb4bf00000000 000000000008aac5 00005a4700000000 000004ef00000000
 ffff8800786e0638 ffff8800407c7fd8 000000000000fb88 ffff8800786e0638
Call Trace:
 [&amp;lt;ffffffffa0649ff8&amp;gt;] cl_conf_set+0x58/0x100 [obdclass]
 [&amp;lt;ffffffffa0a6b6bc&amp;gt;] cl_file_inode_init+0xfc/0x360 [lustre]
 [&amp;lt;ffffffffa0a38e92&amp;gt;] ll_update_inode+0x112/0xe60 [lustre]
 [&amp;lt;ffffffffa0a50e73&amp;gt;] ll_iget+0x1e3/0x2a0 [lustre]
 [&amp;lt;ffffffffa0a3a0c1&amp;gt;] ll_prep_inode+0x4e1/0xbf0 [lustre]
 [&amp;lt;ffffffffa0a4ef77&amp;gt;] ll_lookup_it_finish+0x107/0x9d0 [lustre]
 [&amp;lt;ffffffffa0a4fc43&amp;gt;] ll_lookup_it+0x403/0xbc0 [lustre]
 [&amp;lt;ffffffffa0a5146c&amp;gt;] ll_lookup_nd+0x8c/0x400 [lustre]
 [&amp;lt;ffffffff81189725&amp;gt;] do_lookup+0x1a5/0x230
 [&amp;lt;ffffffff81189fe4&amp;gt;] __link_path_walk+0x734/0x1030
 [&amp;lt;ffffffff8118ab6a&amp;gt;] path_walk+0x6a/0xe0
 [&amp;lt;ffffffff8118ad3b&amp;gt;] do_path_lookup+0x5b/0xa0
 [&amp;lt;ffffffff8118b9a7&amp;gt;] user_path_at+0x57/0xa0
 [&amp;lt;ffffffff811794e7&amp;gt;] sys_chdir+0x27/0x90
 [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
cp            D 0000000000000000     0 23166      1 0x00000080
 ffff8800407f7b80 0000000000000082 ffffffffa0810740 ffff88007a1c2480
 0000000000000000 ffff8800407f7c18 ffffffffa0767876 1d7145a988afca52
 ffff8800774f45f8 ffff8800407f7fd8 000000000000fb88 ffff8800774f45f8
Call Trace:
 [&amp;lt;ffffffffa09a4f04&amp;gt;] lov_lsm_addref+0x34/0x150 [lov]
 [&amp;lt;ffffffffa09a56e5&amp;gt;] lov_io_init+0x75/0x1c0 [lov]
 [&amp;lt;ffffffffa0659378&amp;gt;] cl_io_init0+0x98/0x160 [obdclass]
 [&amp;lt;ffffffffa065c264&amp;gt;] cl_io_init+0x64/0x100 [obdclass]
 [&amp;lt;ffffffffa0a6ae3d&amp;gt;] cl_glimpse_size0+0x7d/0x190 [lustre]
 [&amp;lt;ffffffffa0a27f62&amp;gt;] ll_inode_revalidate_it+0xf2/0x1c0 [lustre]
 [&amp;lt;ffffffffa0a28079&amp;gt;] ll_getattr_it+0x49/0x170 [lustre]
 [&amp;lt;ffffffffa0a281d7&amp;gt;] ll_getattr+0x37/0x40 [lustre]
 [&amp;lt;ffffffff81180571&amp;gt;] vfs_getattr+0x51/0x80
 [&amp;lt;ffffffff8118082f&amp;gt;] vfs_fstat+0x3f/0x60
 [&amp;lt;ffffffff81180874&amp;gt;] sys_newfstat+0x24/0x40
 [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b


ll_imp_inval  D 0000000000000000     0 23859      2 0x00000080
 ffff8800534ebad0 0000000000000046 0000000100000000 0000000000000000
 ffff8800534eba60 ffffffffa04fb262 ffff8800534ebac0 0000000000000001
 ffff88005337d058 ffff8800534ebfd8 000000000000fb88 ffff88005337d058
Call Trace:
 [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
 [&amp;lt;ffffffffa090224a&amp;gt;] osc_ldlm_blocking_ast+0x7a/0x380 [osc]
 [&amp;lt;ffffffffa076a480&amp;gt;] ldlm_cancel_callback+0x60/0x100 [ptlrpc]
 [&amp;lt;ffffffffa0778c1b&amp;gt;] ldlm_cli_cancel_local+0x7b/0x380 [ptlrpc]
 [&amp;lt;ffffffffa077cae8&amp;gt;] ldlm_cli_cancel+0x58/0x3a0 [ptlrpc]
 [&amp;lt;ffffffffa0772958&amp;gt;] cleanup_resource+0x168/0x300 [ptlrpc]
 [&amp;lt;ffffffffa0772b1a&amp;gt;] ldlm_resource_clean+0x2a/0x50 [ptlrpc]
 [&amp;lt;ffffffffa04fc97f&amp;gt;] cfs_hash_for_each_relax+0x17f/0x380 [libcfs]
 [&amp;lt;ffffffffa04fe3bf&amp;gt;] cfs_hash_for_each_nolock+0x7f/0x1c0 [libcfs]
 [&amp;lt;ffffffffa076f629&amp;gt;] ldlm_namespace_cleanup+0x29/0xb0 [ptlrpc]
 [&amp;lt;ffffffffa08f5f11&amp;gt;] osc_import_event+0x5a1/0x19b0 [osc]
 [&amp;lt;ffffffffa07c1b69&amp;gt;] ptlrpc_invalidate_import+0x2a9/0x8d0 [ptlrpc]
 [&amp;lt;ffffffffa07c23ff&amp;gt;] ptlrpc_invalidate_import_thread+0x4f/0x2f0 [ptlrpc]
 [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="43724" author="laisiyao" created="Fri, 24 Aug 2012 03:01:12 +0000"  >&lt;p&gt;There looks to be deadlock between below processes:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;
ldlm_bl_12    D 0000000000000000     0  9667      2 0x00000080
 ffff880053011ad8 0000000000000046 000000000027a41e ffff880078a56030
 ffff880078a57000 ffff880053011a90 ffffffff81039678 ffff8800ffffffff
 ffff88005300fab8 ffff880053011fd8 000000000000fb88 ffff88005300fab8
Call Trace:
 [&amp;lt;ffffffff814ffec5&amp;gt;] rwsem_down_failed_common+0x95/0x1d0
 [&amp;lt;ffffffff81500056&amp;gt;] rwsem_down_read_failed+0x26/0x30
 [&amp;lt;ffffffff8127e664&amp;gt;] call_rwsem_down_read_failed+0x14/0x30
 [&amp;lt;ffffffffa09a4f04&amp;gt;] lov_lsm_addref+0x34/0x150 [lov]
 [&amp;lt;ffffffffa09a56e5&amp;gt;] lov_io_init+0x75/0x1c0 [lov]
 [&amp;lt;ffffffffa0659378&amp;gt;] cl_io_init0+0x98/0x160 [obdclass]
 [&amp;lt;ffffffffa065c264&amp;gt;] cl_io_init+0x64/0x100 [obdclass]
 [&amp;lt;ffffffffa0653fa4&amp;gt;] cl_lock_discard_pages+0x64/0x1f0 [obdclass]
 [&amp;lt;ffffffffa09004e0&amp;gt;] osc_lock_flush+0x110/0x200 [osc]
 [&amp;lt;ffffffffa0900629&amp;gt;] osc_lock_cancel+0x59/0x1a0 [osc]
 [&amp;lt;ffffffffa0651dc5&amp;gt;] cl_lock_cancel0+0x75/0x160 [obdclass]
 [&amp;lt;ffffffffa0652a2b&amp;gt;] cl_lock_cancel+0x13b/0x140 [obdclass]
 [&amp;lt;ffffffffa090230a&amp;gt;] osc_ldlm_blocking_ast+0x13a/0x380 [osc]
 [&amp;lt;ffffffffa077fcd3&amp;gt;] ldlm_handle_bl_callback+0x123/0x2e0 [ptlrpc]
 [&amp;lt;ffffffffa0780111&amp;gt;] ldlm_bl_thread_main+0x281/0x3d0 [ptlrpc]
 [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
mv            D 0000000000000000     0 23117      1 0x00000080
 ffff88004078d928 0000000000000086 00000000ffffffff 000000000078d908
 ffff880000000030 ffff88004078d9a8 ffff88004078d938 ffffffff81039678
 ffff880078895058 ffff88004078dfd8 000000000000fb88 ffff880078895058
Call Trace:
 [&amp;lt;ffffffff814fefbe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
 [&amp;lt;ffffffff814fee5b&amp;gt;] mutex_lock+0x2b/0x50
 [&amp;lt;ffffffffa06545b7&amp;gt;] cl_lock_mutex_get+0x77/0xe0 [obdclass]
 [&amp;lt;ffffffffa0657b76&amp;gt;] cl_lock_hold_mutex+0x96/0x6c0 [obdclass]
 [&amp;lt;ffffffffa06581d3&amp;gt;] cl_lock_hold+0x33/0x100 [obdclass]
 [&amp;lt;ffffffffa09abc9d&amp;gt;] lov_sublock_alloc+0x11d/0x470 [lov]
 [&amp;lt;ffffffffa09acc04&amp;gt;] lov_lock_init_raid0+0x3e4/0xed0 [lov]
 [&amp;lt;ffffffffa09a5338&amp;gt;] lov_lock_init+0x68/0xe0 [lov]
 [&amp;lt;ffffffffa0657e7c&amp;gt;] cl_lock_hold_mutex+0x39c/0x6c0 [obdclass]
 [&amp;lt;ffffffffa0658302&amp;gt;] cl_lock_request+0x62/0x280 [obdclass]
 [&amp;lt;ffffffffa0a6a6bb&amp;gt;] cl_glimpse_lock+0x17b/0x4a0 [lustre]
 [&amp;lt;ffffffffa0a6af47&amp;gt;] cl_glimpse_size0+0x187/0x190 [lustre]
 [&amp;lt;ffffffffa0a27f62&amp;gt;] ll_inode_revalidate_it+0xf2/0x1c0 [lustre]
 [&amp;lt;ffffffffa0a28079&amp;gt;] ll_getattr_it+0x49/0x170 [lustre]
 [&amp;lt;ffffffffa0a281d7&amp;gt;] ll_getattr+0x37/0x40 [lustre]
 [&amp;lt;ffffffff81180571&amp;gt;] vfs_getattr+0x51/0x80
 [&amp;lt;ffffffff81180600&amp;gt;] vfs_fstatat+0x60/0x80
 [&amp;lt;ffffffff8118068e&amp;gt;] vfs_lstat+0x1e/0x20
 [&amp;lt;ffffffff811806b4&amp;gt;] sys_newlstat+0x24/0x50
 [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Process ldlm_bl_12:&lt;br/&gt;
cl_lock_mutex_lock in osc_ldlm_block_ast() &lt;del&gt;&amp;gt; lov&lt;/del&gt;&amp;gt;lo_type_guard in lov_lsm_addref().&lt;/p&gt;

&lt;p&gt;Process mv:&lt;br/&gt;
lov-&amp;gt;lo_type_guard in lov_lock_init() -&amp;gt; cl_lock_mutex_lock in cl_lock_hold_mutex().&lt;/p&gt;

&lt;p&gt;Due to the complication of CLIO lock, Jinshan, could you give some advice on the fix?&lt;/p&gt;</comment>
                            <comment id="43731" author="jlevi" created="Fri, 24 Aug 2012 09:13:18 +0000"  >&lt;p&gt;Lai has not been able to reproduce this yet, but believes it is a CLIO bug. He will talk to Jinshan to try to determine fix.&lt;/p&gt;</comment>
                            <comment id="43808" author="jay" created="Mon, 27 Aug 2012 12:39:16 +0000"  >&lt;p&gt;I think the problem is due to lov_layout_wait() in lov_conf_set(). It sleeps while holding lo_type_guard. The proper fix would be to not hold type guard while sleeping.&lt;/p&gt;</comment>
                            <comment id="43868" author="laisiyao" created="Tue, 28 Aug 2012 09:33:48 +0000"  >&lt;p&gt;The deadlock is as below:&lt;br/&gt;
1. Process mv takes lov-&amp;gt;lo_type_guard read semaphore.&lt;br/&gt;
2. Process ldlm_bl_12 takes cl_lock_mutex_lock.&lt;br/&gt;
3. Process mkdir enqueues lov-&amp;gt;lo_type_guard write semaphore (step 1 has taken read lock).&lt;br/&gt;
4. Process ldlm_bl_12 enqueues lov-&amp;gt;lo_type_guard read semaphore (because rw_semaphore fairness, read semaphore is blocked if a write is already enqueued).&lt;br/&gt;
5. Process mv tries to take cl_lock_mutex_lock.&lt;/p&gt;

&lt;p&gt;Then the three processes deadlocked. We&apos;ll try to give a fix tomorrow.&lt;/p&gt;</comment>
                            <comment id="43917" author="laisiyao" created="Tue, 28 Aug 2012 23:48:42 +0000"  >&lt;p&gt;review is on &lt;a href=&quot;http://review.whamcloud.com/#change,3807&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3807&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="44082" author="pjones" created="Sun, 2 Sep 2012 15:06:36 +0000"  >&lt;p&gt;Landed for 2.3 and 2.4&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv4r3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4303</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>