<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:07:07 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7232] racer deadlock</title>
                <link>https://jira.whamcloud.com/browse/LU-7232</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;I am hitting deadlocks in racer now (no DNE) that started very recently, though notging in the affected path seems to have changed for quite a while which is really strange.&lt;/p&gt;

&lt;p&gt;Traces look like this:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[282420.297213] LNet: Service thread pid 19612 was inactive for 62.01s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[282420.298976] Pid: 19612, comm: mdt01_009
[282420.301584] 
[282420.301585] Call Trace:
[282420.302572]  [&amp;lt;ffffffffa189d750&amp;gt;] ? _ldlm_lock_debug+0x300/0x690 [ptlrpc]
[282420.303111]  [&amp;lt;ffffffff81530c14&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
[282420.307584]  [&amp;lt;ffffffff8152d911&amp;gt;] schedule_timeout+0x191/0x2e0
[282420.308111]  [&amp;lt;ffffffff81088290&amp;gt;] ? process_timeout+0x0/0x10
[282420.309183]  [&amp;lt;ffffffffa18beac0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc]
[282420.310721]  [&amp;lt;ffffffffa18c3709&amp;gt;] ldlm_completion_ast+0x609/0x9b0 [ptlrpc]
[282420.311557]  [&amp;lt;ffffffff81063a80&amp;gt;] ? default_wake_function+0x0/0x20
[282420.312337]  [&amp;lt;ffffffffa18c2a6e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x8b0 [ptlrpc]
[282420.313710]  [&amp;lt;ffffffffa18c3100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[282420.314663]  [&amp;lt;ffffffffa09f30d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[282420.315355]  [&amp;lt;ffffffffa09ff63b&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
[282420.316058]  [&amp;lt;ffffffffa09f30d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[282420.316914]  [&amp;lt;ffffffffa18c3100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[282420.317818]  [&amp;lt;ffffffffa0f750f1&amp;gt;] ? lu_object_find_at+0xb1/0xe0 [obdclass]
[282420.318513]  [&amp;lt;ffffffffa0a0026f&amp;gt;] mdt_object_lock_internal+0x5f/0x2d0 [mdt]
[282420.319335]  [&amp;lt;ffffffffa0a005a1&amp;gt;] mdt_object_lock+0x11/0x20 [mdt]
[282420.320011]  [&amp;lt;ffffffffa0a18371&amp;gt;] mdt_reint_unlink+0x831/0x10e0 [mdt]
[282420.320838]  [&amp;lt;ffffffffa0f90100&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
[282420.321531]  [&amp;lt;ffffffffa09f2245&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
[282420.322188]  [&amp;lt;ffffffffa0a0b71c&amp;gt;] ? mdt_root_squash+0x2c/0x3f0 [mdt]
[282420.323033]  [&amp;lt;ffffffffa0a0f85d&amp;gt;] mdt_reint_rec+0x5d/0x200 [mdt]
[282420.323843]  [&amp;lt;ffffffffa09f8c1b&amp;gt;] mdt_reint_internal+0x62b/0xa40 [mdt]
[282420.324529]  [&amp;lt;ffffffffa09f94cb&amp;gt;] mdt_reint+0x6b/0x120 [mdt]
[282420.325248]  [&amp;lt;ffffffffa195685c&amp;gt;] tgt_request_handle+0x8bc/0x12e0 [ptlrpc]
[282420.326144]  [&amp;lt;ffffffffa1901b74&amp;gt;] ptlrpc_main+0xd74/0x1850 [ptlrpc]
[282420.327006]  [&amp;lt;ffffffffa1900e00&amp;gt;] ? ptlrpc_main+0x0/0x1850 [ptlrpc]
[282420.327769]  [&amp;lt;ffffffff8109f82e&amp;gt;] kthread+0x9e/0xc0
[282420.328515]  [&amp;lt;ffffffff8100c2ca&amp;gt;] child_rip+0xa/0x20
[282420.333000]  [&amp;lt;ffffffff8109f790&amp;gt;] ? kthread+0x0/0xc0
[282420.333702]  [&amp;lt;ffffffff8100c2c0&amp;gt;] ? child_rip+0x0/0x20
[282420.334388] 
[282420.341475] LustreError: dumping log to /tmp/lustre-log.1443344508.19612
[282422.536531] LNet: Service thread pid 28020 was inactive for 62.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[282422.538633] Pid: 28020, comm: mdt00_000
[282422.539095] 
[282422.539095] Call Trace:
[282422.550545]  [&amp;lt;ffffffffa189d750&amp;gt;] ? _ldlm_lock_debug+0x300/0x690 [ptlrpc]
[282422.551181]  [&amp;lt;ffffffff81530c14&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
[282422.551798]  [&amp;lt;ffffffff8152d911&amp;gt;] schedule_timeout+0x191/0x2e0
[282422.552330]  [&amp;lt;ffffffff81088290&amp;gt;] ? process_timeout+0x0/0x10
[282422.552959]  [&amp;lt;ffffffffa18beac0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc]
[282422.554046]  [&amp;lt;ffffffffa18c3709&amp;gt;] ldlm_completion_ast+0x609/0x9b0 [ptlrpc]
[282422.554605]  [&amp;lt;ffffffff81063a80&amp;gt;] ? default_wake_function+0x0/0x20
[282422.555195]  [&amp;lt;ffffffffa18c2a6e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x8b0 [ptlrpc]
[282422.556246]  [&amp;lt;ffffffffa18c3100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[282422.557344]  [&amp;lt;ffffffffa09f30d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[282422.557890]  [&amp;lt;ffffffffa09ff63b&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
[282422.558695]  [&amp;lt;ffffffffa09f30d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[282422.559262]  [&amp;lt;ffffffffa18c3100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[282422.559915]  [&amp;lt;ffffffffa0f750f1&amp;gt;] ? lu_object_find_at+0xb1/0xe0 [obdclass]
[282422.560732]  [&amp;lt;ffffffffa0a0026f&amp;gt;] mdt_object_lock_internal+0x5f/0x2d0 [mdt]
[282422.561378]  [&amp;lt;ffffffffa0a005a1&amp;gt;] mdt_object_lock+0x11/0x20 [mdt]
[282422.561941]  [&amp;lt;ffffffffa0a06ee6&amp;gt;] mdt_getattr_name_lock+0xf16/0x1910 [mdt]
[282422.562501]  [&amp;lt;ffffffffa0a0be99&amp;gt;] ? old_init_ucred+0x1b9/0x390 [mdt]
[282422.563036]  [&amp;lt;ffffffffa0a07e02&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
[282422.563592]  [&amp;lt;ffffffffa09f7694&amp;gt;] mdt_intent_policy+0x494/0xc40 [mdt]
[282422.564269]  [&amp;lt;ffffffffa18a311f&amp;gt;] ldlm_lock_enqueue+0x12f/0x860 [ptlrpc]
[282422.564834]  [&amp;lt;ffffffffa0844c01&amp;gt;] ? cfs_hash_for_each_enter+0x11/0xa0 [libcfs]
[282422.565795]  [&amp;lt;ffffffffa18cf067&amp;gt;] ldlm_handle_enqueue0+0x807/0x1580 [ptlrpc]
[282422.566376]  [&amp;lt;ffffffffa1943c61&amp;gt;] ? tgt_lookup_reply+0x31/0x190 [ptlrpc]
[282422.566950]  [&amp;lt;ffffffffa1955dd1&amp;gt;] tgt_enqueue+0x61/0x230 [ptlrpc]
[282422.567501]  [&amp;lt;ffffffffa195685c&amp;gt;] tgt_request_handle+0x8bc/0x12e0 [ptlrpc]
[282422.568062]  [&amp;lt;ffffffffa1901b74&amp;gt;] ptlrpc_main+0xd74/0x1850 [ptlrpc]
[282422.568619]  [&amp;lt;ffffffffa1900e00&amp;gt;] ? ptlrpc_main+0x0/0x1850 [ptlrpc]
[282422.569140]  [&amp;lt;ffffffff8109f82e&amp;gt;] kthread+0x9e/0xc0
[282422.569757]  [&amp;lt;ffffffff8100c2ca&amp;gt;] child_rip+0xa/0x20
[282422.570246]  [&amp;lt;ffffffff8109f790&amp;gt;] ? kthread+0x0/0xc0
[282422.570836]  [&amp;lt;ffffffff8100c2c0&amp;gt;] ? child_rip+0x0/0x20
[282422.571377] 
[282422.571926] LustreError: dumping log to /tmp/lustre-log.1443344510.28020
...
[283258.816371] LustreError: 0:0:(ldlm_lockd.c:342:waiting_locks_callback()) ### lock callback timer expired after 901s: evicting client at 0@lo  ns: mdt-lustre-MDT0000_UUID lock: ffff8800b7997db8/0x116f98c7376ef4db lrc: 3/0,0 mode: PR/PR res: [0x200000402:0x256c:0x0].0 bits 0x1b rrc: 3 type: IBT flags: 0x60200000000020 nid: 0@lo remote: 0x116f98c7376ef4b1 expref: 1380 pid: 28373 timeout: 4365706867 lvb_type: 0
[283258.831059] LustreError: 28025:0:(ldlm_lockd.c:1404:ldlm_handle_enqueue0()) ### lock on destroyed export ffff880070a307f0 ns: mdt-lustre-MDT0000_UUID lock: ffff880036b72db8/0x116f98c737716a4f lrc: 3/0,0 mode: PR/PR res: [0x200000401:0x25ee:0x0].0 bits 0x13 rrc: 18 type: IBT flags: 0x50200000000000 nid: 0@lo remote: 0x116f98c737716a33 expref: 373 pid: 28025 timeout: 0 lvb_type: 0
[283258.831132] Lustre: 19612:0:(service.c:2097:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (600:301s); client may timeout.  req@ffff88008508ace8 x1513454483028456/t4295116216(0) o36-&amp;gt;b54de1b7-f374-b4f0-2bb1-81a9b6732569@0@lo:221/0 lens 608/424 e 23 to 0 dl 1443345046 ref 1 fl Complete:/0/0 rc 0/0
[283258.832328] LNet: Service thread pid 19612 completed after 900.54s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This actually might indicate that something on the client is holdign the lock, I guess.&lt;/p&gt;

&lt;p&gt;Another instance:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[14352.684182] Pid: 32225, comm: mdt00_005
[14352.685471] 
[14352.685473] Call Trace:
[14352.687868]  [&amp;lt;ffffffffa13ac750&amp;gt;] ? _ldlm_lock_debug+0x300/0x690 [ptlrpc]
[14352.689215]  [&amp;lt;ffffffff81530c14&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
[14352.690435]  [&amp;lt;ffffffff8152d911&amp;gt;] schedule_timeout+0x191/0x2e0
[14352.691589]  [&amp;lt;ffffffff81088290&amp;gt;] ? process_timeout+0x0/0x10
[14352.692847]  [&amp;lt;ffffffffa13cdac0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc]
[14352.695227]  [&amp;lt;ffffffffa13d2709&amp;gt;] ldlm_completion_ast+0x609/0x9b0 [ptlrpc]
[14352.696517]  [&amp;lt;ffffffff81063a80&amp;gt;] ? default_wake_function+0x0/0x20
[14352.697885]  [&amp;lt;ffffffffa13d1a6e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x8b0 [ptlrpc]
[14352.699005]  [&amp;lt;ffffffffa13d2100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[14352.700185]  [&amp;lt;ffffffffa08a00d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[14352.701376]  [&amp;lt;ffffffffa08ac63b&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
[14352.702496]  [&amp;lt;ffffffffa08a00d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[14352.703696]  [&amp;lt;ffffffffa13d2100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[14352.704971]  [&amp;lt;ffffffffa11e00f1&amp;gt;] ? lu_object_find_at+0xb1/0xe0 [obdclass]
[14352.707110]  [&amp;lt;ffffffffa08ad26f&amp;gt;] mdt_object_lock_internal+0x5f/0x2d0 [mdt]
[14352.708140]  [&amp;lt;ffffffffa08ad5a1&amp;gt;] mdt_object_lock+0x11/0x20 [mdt]
[14352.709256]  [&amp;lt;ffffffffa08c5371&amp;gt;] mdt_reint_unlink+0x831/0x10e0 [mdt]
[14352.710261]  [&amp;lt;ffffffffa11fb100&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
[14352.711259]  [&amp;lt;ffffffffa089f245&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
[14352.712261]  [&amp;lt;ffffffffa08b871c&amp;gt;] ? mdt_root_squash+0x2c/0x3f0 [mdt]
[14352.713320]  [&amp;lt;ffffffffa08bc85d&amp;gt;] mdt_reint_rec+0x5d/0x200 [mdt]
[14352.714268]  [&amp;lt;ffffffffa08a5c1b&amp;gt;] mdt_reint_internal+0x62b/0xa40 [mdt]
[14352.715290]  [&amp;lt;ffffffffa08a64cb&amp;gt;] mdt_reint+0x6b/0x120 [mdt]
[14352.716257]  [&amp;lt;ffffffffa146585c&amp;gt;] tgt_request_handle+0x8bc/0x12e0 [ptlrpc]
[14352.717317]  [&amp;lt;ffffffffa1410b74&amp;gt;] ptlrpc_main+0xd74/0x1850 [ptlrpc]
[14352.718264]  [&amp;lt;ffffffffa140fe00&amp;gt;] ? ptlrpc_main+0x0/0x1850 [ptlrpc]
[14352.719187]  [&amp;lt;ffffffff8109f82e&amp;gt;] kthread+0x9e/0xc0
[14352.720006]  [&amp;lt;ffffffff8100c2ca&amp;gt;] child_rip+0xa/0x20
[14352.720856]  [&amp;lt;ffffffff8109f790&amp;gt;] ? kthread+0x0/0xc0
[14352.721748]  [&amp;lt;ffffffff8100c2c0&amp;gt;] ? child_rip+0x0/0x20
[14352.722571] 
[14352.723270] LustreError: dumping log to /tmp/lustre-log.1443076451.32225
[14352.860186] Pid: 30999, comm: mdt00_000
[14352.860651] 
[14352.860652] Call Trace:
[14352.861604]  [&amp;lt;ffffffffa13ac750&amp;gt;] ? _ldlm_lock_debug+0x300/0x690 [ptlrpc]
[14352.862129]  [&amp;lt;ffffffff81530c14&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
[14352.862628]  [&amp;lt;ffffffff8152d911&amp;gt;] schedule_timeout+0x191/0x2e0
[14352.863133]  [&amp;lt;ffffffff81088290&amp;gt;] ? process_timeout+0x0/0x10
[14352.863656]  [&amp;lt;ffffffffa13cdac0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc]
[14352.864646]  [&amp;lt;ffffffffa13d2709&amp;gt;] ldlm_completion_ast+0x609/0x9b0 [ptlrpc]
[14352.865201]  [&amp;lt;ffffffff81063a80&amp;gt;] ? default_wake_function+0x0/0x20
[14352.865735]  [&amp;lt;ffffffffa13d1a6e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x8b0 [ptlrpc]
[14352.866279]  [&amp;lt;ffffffffa13d2100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[14352.866815]  [&amp;lt;ffffffffa08a00d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[14352.867337]  [&amp;lt;ffffffffa08ac63b&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
[14352.867861]  [&amp;lt;ffffffffa08a00d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[14352.868392]  [&amp;lt;ffffffffa13d2100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[14352.868942]  [&amp;lt;ffffffffa11e00f1&amp;gt;] ? lu_object_find_at+0xb1/0xe0 [obdclass]
[14352.869511]  [&amp;lt;ffffffffa08ad26f&amp;gt;] mdt_object_lock_internal+0x5f/0x2d0 [mdt]
[14352.870048]  [&amp;lt;ffffffffa08ad5a1&amp;gt;] mdt_object_lock+0x11/0x20 [mdt]
[14352.870577]  [&amp;lt;ffffffffa08b3ee6&amp;gt;] mdt_getattr_name_lock+0xf16/0x1910 [mdt]
[14352.871098]  [&amp;lt;ffffffffa08b8e99&amp;gt;] ? old_init_ucred+0x1b9/0x390 [mdt]
[14352.871632]  [&amp;lt;ffffffffa08b4e02&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
[14352.872171]  [&amp;lt;ffffffffa08a4694&amp;gt;] mdt_intent_policy+0x494/0xc40 [mdt]
[14352.872731]  [&amp;lt;ffffffffa13b211f&amp;gt;] ldlm_lock_enqueue+0x12f/0x860 [ptlrpc]
[14352.873280]  [&amp;lt;ffffffffa10d6c01&amp;gt;] ? cfs_hash_for_each_enter+0x11/0xa0 [libcfs]
[14352.874264]  [&amp;lt;ffffffffa13de067&amp;gt;] ldlm_handle_enqueue0+0x807/0x1580 [ptlrpc]
[14352.874852]  [&amp;lt;ffffffffa1452c61&amp;gt;] ? tgt_lookup_reply+0x31/0x190 [ptlrpc]
[14352.875387]  [&amp;lt;ffffffffa1464dd1&amp;gt;] tgt_enqueue+0x61/0x230 [ptlrpc]
[14352.875916]  [&amp;lt;ffffffffa146585c&amp;gt;] tgt_request_handle+0x8bc/0x12e0 [ptlrpc]
[14352.876447]  [&amp;lt;ffffffffa1410b74&amp;gt;] ptlrpc_main+0xd74/0x1850 [ptlrpc]
[14352.876972]  [&amp;lt;ffffffffa140fe00&amp;gt;] ? ptlrpc_main+0x0/0x1850 [ptlrpc]
[14352.877486]  [&amp;lt;ffffffff8109f82e&amp;gt;] kthread+0x9e/0xc0
[14352.878027]  [&amp;lt;ffffffff8100c2ca&amp;gt;] child_rip+0xa/0x20
[14352.878501]  [&amp;lt;ffffffff8109f790&amp;gt;] ? kthread+0x0/0xc0
[14352.878999]  [&amp;lt;ffffffff8100c2c0&amp;gt;] ? child_rip+0x0/0x20
[14352.879508] 
[14352.904289] Pid: 32083, comm: mdt00_004
[14352.905061] 
[14352.905062] Call Trace:
[14352.906744]  [&amp;lt;ffffffffa13ac750&amp;gt;] ? _ldlm_lock_debug+0x300/0x690 [ptlrpc]
[14352.907771]  [&amp;lt;ffffffff81530c14&amp;gt;] ? _spin_lock_irqsave+0x24/0x30
[14352.908749]  [&amp;lt;ffffffff8152d911&amp;gt;] schedule_timeout+0x191/0x2e0
[14352.909837]  [&amp;lt;ffffffff81088290&amp;gt;] ? process_timeout+0x0/0x10
[14352.929740]  [&amp;lt;ffffffffa13cdac0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x370 [ptlrpc]
[14352.931364]  [&amp;lt;ffffffffa13d2709&amp;gt;] ldlm_completion_ast+0x609/0x9b0 [ptlrpc]
[14352.932198]  [&amp;lt;ffffffff81063a80&amp;gt;] ? default_wake_function+0x0/0x20
[14352.933066]  [&amp;lt;ffffffffa13d1a6e&amp;gt;] ldlm_cli_enqueue_local+0x21e/0x8b0 [ptlrpc]
[14352.934005]  [&amp;lt;ffffffffa13d2100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[14352.934895]  [&amp;lt;ffffffffa08a00d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[14352.935734]  [&amp;lt;ffffffffa08ac63b&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
[14352.936567]  [&amp;lt;ffffffffa08a00d0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[14352.937429]  [&amp;lt;ffffffffa13d2100&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[14352.938325]  [&amp;lt;ffffffffa11e00f1&amp;gt;] ? lu_object_find_at+0xb1/0xe0 [obdclass]
[14352.939119]  [&amp;lt;ffffffffa08ad26f&amp;gt;] mdt_object_lock_internal+0x5f/0x2d0 [mdt]
[14352.939954]  [&amp;lt;ffffffffa08ad5a1&amp;gt;] mdt_object_lock+0x11/0x20 [mdt]
[14352.940772]  [&amp;lt;ffffffffa08b3ee6&amp;gt;] mdt_getattr_name_lock+0xf16/0x1910 [mdt]
[14352.941575]  [&amp;lt;ffffffffa08b8e99&amp;gt;] ? old_init_ucred+0x1b9/0x390 [mdt]
[14352.942437]  [&amp;lt;ffffffffa08b4e02&amp;gt;] mdt_intent_getattr+0x292/0x470 [mdt]
[14352.943210]  [&amp;lt;ffffffffa08a4694&amp;gt;] mdt_intent_policy+0x494/0xc40 [mdt]
[14352.944072]  [&amp;lt;ffffffffa13b211f&amp;gt;] ldlm_lock_enqueue+0x12f/0x860 [ptlrpc]
[14352.944902]  [&amp;lt;ffffffffa10d6c01&amp;gt;] ? cfs_hash_for_each_enter+0x11/0xa0 [libcfs]
[14352.946718]  [&amp;lt;ffffffffa13de067&amp;gt;] ldlm_handle_enqueue0+0x807/0x1580 [ptlrpc]
[14352.947607]  [&amp;lt;ffffffffa1452c61&amp;gt;] ? tgt_lookup_reply+0x31/0x190 [ptlrpc]
[14352.948420]  [&amp;lt;ffffffffa1464dd1&amp;gt;] tgt_enqueue+0x61/0x230 [ptlrpc]
[14352.949236]  [&amp;lt;ffffffffa146585c&amp;gt;] tgt_request_handle+0x8bc/0x12e0 [ptlrpc]
[14352.950061]  [&amp;lt;ffffffffa1410b74&amp;gt;] ptlrpc_main+0xd74/0x1850 [ptlrpc]
[14352.950873]  [&amp;lt;ffffffffa140fe00&amp;gt;] ? ptlrpc_main+0x0/0x1850 [ptlrpc]
[14352.951644]  [&amp;lt;ffffffff8109f82e&amp;gt;] kthread+0x9e/0xc0
[14352.952376]  [&amp;lt;ffffffff8100c2ca&amp;gt;] child_rip+0xa/0x20
[14352.953105]  [&amp;lt;ffffffff8109f790&amp;gt;] ? kthread+0x0/0xc0
[14352.953879]  [&amp;lt;ffffffff8100c2c0&amp;gt;] ? child_rip+0x0/0x20
[14352.954613] 
...
[14885.680137] Lustre: 2487:0:(service.c:1336:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/5), not sending early reply
[14885.680141]   req@ffff880079253ce8 x1513175160155132/t0(0) o36-&amp;gt;a8b0df4a-0659-2218-5d9c-1d72e2e0bf80@0@lo:189/0 lens 608/3128 e 23 to 0 dl 1443076989 ref 2 fl Interpret:/0/0 rc 0/0
[14891.680180] Lustre: 28828:0:(client.c:2039:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1443076389/real 1443076389]  req@ffff88002fe14ce8 x1513175160155132/t0(0) o36-&amp;gt;lustre-MDT0000-mdc-ffff8800871fa7f0@0@lo:12/10 lens 608/856 e 23 to 1 dl 1443076990 ref 2 fl Rpc:X/0/ffffffff rc 0/-1
[14891.688920] Lustre: lustre-MDT0000-mdc-ffff8800871fa7f0: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using this service will wait for recovery to complete
[14891.693285] Lustre: lustre-MDT0000: Client a8b0df4a-0659-2218-5d9c-1d72e2e0bf80 (at 0@lo) reconnecting
[14891.695699] Lustre: lustre-MDT0000: Connection restored to 192.168.10.218@tcp (at 0@lo)
[14891.697793] Lustre: Skipped 5 previous similar messages
[15190.816179] LustreError: 0:0:(ldlm_lockd.c:342:waiting_locks_callback()) ### lock callback timer expired after 900s: evicting client at 0@lo  ns: mdt-lustre-MDT0000_UUID lock: ffff8800140f0db8/0x24d4ac7f4cf853c0 lrc: 3/0,0 mode: PR/PR res: [0x200000401:0xcc7:0x0].0 bits 0x1b rrc: 3 type: IBT flags: 0x60200000000020 nid: 0@lo remote: 0x24d4ac7f4cf8538f expref: 431 pid: 32765 timeout: 4298689966 lvb_type: 0
[15190.835968] Lustre: 32225:0:(service.c:2097:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (600:300s); client may timeout.  req@ffff880079253ce8 x1513175160155132/t4295000184(0) o36-&amp;gt;a8b0df4a-0659-2218-5d9c-1d72e2e0bf80@0@lo:189/0 lens 608/424 e 23 to 0 dl 1443076989 ref 1 fl Complete:/0/0 rc 0/0
[15190.837500] LustreError: 30999:0:(ldlm_lockd.c:1404:ldlm_handle_enqueue0()) ### lock on destroyed export ffff88009a5237f0 ns: mdt-lustre-MDT0000_UUID lock: ffff8800198b2db8/0x24d4ac7f4cf86bde lrc: 3/0,0 mode: PR/PR res: [0x200000401:0xc2b:0x0].0 bits 0x13 rrc: 2 type: IBT flags: 0x50200000000000 nid: 0@lo remote: 0x24d4ac7f4cf86bc2 expref: 5 pid: 30999 timeout: 0 lvb_type: 0
[15190.837757] LustreError: 11-0: lustre-MDT0000-mdc-ffff8800871fa7f0: operation ldlm_enqueue to node 0@lo failed: rc = -107
[15190.837770] Lustre: lustre-MDT0000-mdc-ffff8800871fa7f0: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using this service will wait for recovery to complete
[15190.837776] Lustre: Skipped 1 previous similar message
[15190.837864] LNet: Service thread pid 30999 completed after 900.01s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
[15190.838528] Lustre: lustre-MDT0000: Connection restored to 192.168.10.218@tcp (at 0@lo)
[15190.838532] Lustre: Skipped 3 previous similar messages
[15190.838756] LustreError: 167-0: lustre-MDT0000-mdc-ffff8800871fa7f0: This client was evicted by lustre-MDT0000; in progress operations using this service will fail.
[15190.842929] LustreError: 29000:0:(lmv_obd.c:1323:lmv_fid_alloc()) Can&apos;t alloc new fid, rc -19
[15190.850803] LustreError: 29117:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000402:0xcee:0x0] mdc close failed: rc = -108
[15190.850806] LustreError: 29117:0:(file.c:184:ll_close_inode_openhandle()) Skipped 2 previous similar messages
[15190.853624] LustreError: 29065:0:(mdc_request.c:1283:mdc_read_page()) lustre-MDT0000-mdc-ffff8800871fa7f0: [0x200000401:0x1:0x0] lock enqueue fails: rc = -108
[15190.858848] LustreError: 29117:0:(ldlm_resource.c:887:ldlm_resource_complain()) lustre-MDT0000-mdc-ffff8800871fa7f0: namespace resource [0x200000401:0xcc7:0x0].0 (ffff880006543ef8) refcount nonzero (1) after lock cleanup; forcing cleanup.
[15190.858853] LustreError: 29117:0:(ldlm_resource.c:1502:ldlm_resource_dump()) --- Resource: [0x200000401:0xcc7:0x0].0 (ffff880006543ef8) refcount = 2
[15190.858855] LustreError: 29117:0:(ldlm_resource.c:1505:ldlm_resource_dump()) Granted locks (in reverse order):
[15190.858861] LustreError: 29117:0:(ldlm_resource.c:1508:ldlm_resource_dump()) ### ### ns: lustre-MDT0000-mdc-ffff8800871fa7f0 lock: ffff88003fd3ddb8/0x24d4ac7f4cf8538f lrc: 2/1,0 mode: PR/PR res: [0x200000401:0xcc7:0x0].0 bits 0x1b rrc: 2 type: IBT flags: 0x526400000000 nid: local remote: 0x24d4ac7f4cf853c0 expref: -99 pid: 28865 timeout: 0 lvb_type: 3
[15190.869860] Lustre: 32225:0:(service.c:2097:ptlrpc_server_handle_request()) Skipped 2 previous similar messages
[15197.481908] LustreError: 31219:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000402:0xdd4:0x0] mdc close failed: rc = -13
[15197.483545] LustreError: 31219:0:(file.c:184:ll_close_inode_openhandle()) Skipped 2 previous similar messages
[15200.905742] LustreError: 817:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0x1f5:0x0]: -16
[15210.349962] LustreError: 4266:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000402:0x10e7:0x0] mdc close failed: rc = -13
[15210.369363] LustreError: 4266:0:(file.c:184:ll_close_inode_openhandle()) Skipped 2 previous similar messages
[15228.546840] LustreError: 11861:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000402:0x1477:0x0]: -16
[15253.218613] LustreError: 21121:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000403:0xac1:0x0] mdc close failed: rc = -13
[15253.220182] LustreError: 21121:0:(file.c:184:ll_close_inode_openhandle()) Skipped 6 previous similar messages
[15258.216446] LustreError: 23425:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000402:0x1a05:0x0]: -16
[15289.724580] LustreError: 2807:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800b04e17f0: inode [0x200000402:0x1fad:0x0] mdc close failed: rc = -13
[15289.726205] LustreError: 2807:0:(file.c:184:ll_close_inode_openhandle()) Skipped 6 previous similar messages
[15373.365463] LustreError: 2566:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000402:0x2e62:0x0] mdc close failed: rc = -13
[15373.367206] LustreError: 2566:0:(file.c:184:ll_close_inode_openhandle()) Skipped 12 previous similar messages
[15395.126758] LustreError: 1585:0:(mdt_handler.c:895:mdt_getattr_internal()) lustre-MDT0000: getattr error for [0x200000403:0x237e:0x0]: rc = -2
[15474.339855] LustreError: 8844:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000402:0x4094:0x0]: -16
[15501.430962] LustreError: 18674:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000402:0x441d:0x0] mdc close failed: rc = -13
[15501.433615] LustreError: 18674:0:(file.c:184:ll_close_inode_openhandle()) Skipped 24 previous similar messages
[15648.026942] LustreError: 22568:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0x46d9:0x0]: -16
[15773.082317] LustreError: 21578:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800871fa7f0: inode [0x200000402:0x6332:0x0] mdc close failed: rc = -13
[15773.085200] LustreError: 21578:0:(file.c:184:ll_close_inode_openhandle()) Skipped 37 previous similar messages
[15795.694420] LustreError: 32225:0:(mdd_object.c:70:mdd_la_get()) lustre-MDD0000: object [0x200000402:0x66f6:0x0] not found: rc = -2
[15985.390214] LustreError: 1601:0:(mdd_object.c:70:mdd_la_get()) lustre-MDD0000: object [0x200000402:0x7cf5:0x0] not found: rc = -2
[16285.219995] LustreError: 15290:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800b04e17f0: inode [0x200000403:0x9b4a:0x0] mdc close failed: rc = -13
[16285.223121] LustreError: 15290:0:(file.c:184:ll_close_inode_openhandle()) Skipped 75 previous similar messages
[16329.835537] LustreError: 32323:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0xa3e7:0x0]: -16
[16445.032943] LustreError: 9902:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0xb71a:0x0]: -16
[16506.129728] LustreError: 31397:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0xc166:0x0]: -16
[16575.379907] LustreError: 23646:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0xcce2:0x0]: -16
[16616.252674] LustreError: 6409:0:(lcommon_cl.c:185:cl_file_inode_init()) Failure to initialize cl object [0x200000403:0xd417:0x0]: -16
[16770.245965] 4[29243]: segfault at 8 ip 00007fea27c3c3a3 sp 00007ffef4109070 error 4 in ld-2.12.so[7fea27c31000+20000]
[16771.988668] 17[29822]: segfault at 8 ip 00007fc052daa3a3 sp 00007ffe66d195f0 error 4 in ld-2.12.so[7fc052d9f000+20000]
[16889.764587] LustreError: 7954:0:(file.c:184:ll_close_inode_openhandle()) lustre-clilmv-ffff8800b04e17f0: inode [0x200000402:0x10e7e:0x0] mdc close failed: rc = -13
[16889.766871] LustreError: 7954:0:(file.c:184:ll_close_inode_openhandle()) Skipped 96 previous similar messages
[16928.134762] Lustre: DEBUG MARKER: == racer test complete, duration 2752 sec == 03:17:06 (1443079026)
[16928.705843] Lustre: setting import lustre-MDT0000_UUID INACTIVE by administrator request
[16928.871543] LustreError: 20126:0:(ldlm_resource.c:887:ldlm_resource_complain()) lustre-MDT0000-mdc-ffff8800871fa7f0: namespace resource [0x200000401:0xcc7:0x0].0 (ffff880006543ef8) refcount nonzero (1) after lock cleanup; forcing cleanup.
[16928.874285] LustreError: 20126:0:(ldlm_resource.c:1502:ldlm_resource_dump()) --- Resource: [0x200000401:0xcc7:0x0].0 (ffff880006543ef8) refcount = 2
[16928.875716] LustreError: 20126:0:(ldlm_resource.c:1505:ldlm_resource_dump()) Granted locks (in reverse order):
[16928.876930] LustreError: 20126:0:(ldlm_resource.c:1508:ldlm_resource_dump()) ### ### ns: lustre-MDT0000-mdc-ffff8800871fa7f0 lock: ffff88003fd3ddb8/0x24d4ac7f4cf8538f lrc: 2/1,0 mode: PR/PR res: [0x200000401:0xcc7:0x0].0 bits 0x1b rrc: 2 type: IBT flags: 0x526400000000 nid: local remote: 0x24d4ac7f4cf853c0 expref: -99 pid: 28865 timeout: 0 lvb_type: 3
[16929.220780] LustreError: 20126:0:(ldlm_resource.c:1502:ldlm_resource_dump()) --- Resource: [0x200000401:0xcc7:0x0].0 (ffff880006543ef8) refcount = 2
[16929.222171] LustreError: 20126:0:(ldlm_resource.c:1505:ldlm_resource_dump()) Granted locks (in reverse order):
[16934.220047] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16939.220157] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16944.224128] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16949.228140] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16954.228154] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16964.228106] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16964.231892] LustreError: Skipped 1 previous similar message
[16981.388377] Lustre: lustre-OST0000: haven&apos;t heard from client a8b0df4a-0659-2218-5d9c-1d72e2e0bf80 (at 0@lo) in 55 seconds. I think it&apos;s dead, and I am evicting it. exp ffff8800634167f0, cur 1443079080 expire 1443079050 last 1443079025
[16982.108960] Lustre: lustre-MDT0000: haven&apos;t heard from client a8b0df4a-0659-2218-5d9c-1d72e2e0bf80 (at 0@lo) in 54 seconds. I think it&apos;s dead, and I am evicting it. exp ffff88003feef7f0, cur 1443079080 expire 1443079050 last 1443079026
[16982.112411] Lustre: Skipped 1 previous similar message
[16984.232172] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
[16984.236232] LustreError: Skipped 3 previous similar messages
[17019.236099] LustreError: 0-0: Forced cleanup waiting for lustre-MDT0000-mdc-ffff8800871fa7f0 namespace with 1 resources in use, (rc=-110)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The cleanup waiting is happening every time at the end&lt;/p&gt;
</description>
                <environment></environment>
        <key id="32391">LU-7232</key>
            <summary>racer deadlock</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Wed, 30 Sep 2015 03:14:04 +0000</created>
                <updated>Thu, 17 Aug 2017 12:43:42 +0000</updated>
                            <resolved>Wed, 28 Oct 2015 15:24:14 +0000</resolved>
                                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="128934" author="green" created="Wed, 30 Sep 2015 18:30:42 +0000"  >&lt;p&gt;So John did some hunting and thinks the problem was introduced by &lt;a href=&quot;http://review.whamcloud.com/15767&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15767&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="128935" author="jgmitter" created="Wed, 30 Sep 2015 18:32:58 +0000"  >&lt;p&gt;Hi Lai,&lt;br/&gt;
Can you look into this issue?  May have bee introduced by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6578&quot; title=&quot;inodebit locks for remote entries.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6578&quot;&gt;&lt;del&gt;LU-6578&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
Thanks.&lt;br/&gt;
Joe&lt;/p&gt;</comment>
                            <comment id="130431" author="jhammond" created="Wed, 14 Oct 2015 19:06:52 +0000"  >&lt;p&gt;In &lt;tt&gt;revalidate_statahead_dentry()&lt;/tt&gt; if &lt;tt&gt;md_revalidate_lock()&lt;/tt&gt; succeeds but the dentry is stale then the lock reference is leaked:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;                rc = md_revalidate_lock(ll_i2mdexp(dir), &amp;amp;it,
                                        ll_inode2fid(inode), &amp;amp;bits);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc == 1) {
                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; ((*dentryp)-&amp;gt;d_inode == NULL) {
                                struct dentry *alias;

                                alias = ll_splice_alias(inode, *dentryp);
                                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (IS_ERR(alias))
                                        GOTO(out, rc = PTR_ERR(alias));
                                *dentryp = alias;
                                /* statahead prepared &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; inode, transfer inode          
                                 * refcount from sa_entry to dentry */
                                entry-&amp;gt;se_inode = NULL;
                        } &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; ((*dentryp)-&amp;gt;d_inode != inode) {
                                &lt;span class=&quot;code-comment&quot;&gt;/* revalidate, but inode is recreated */&lt;/span&gt;
                                CERROR(&lt;span class=&quot;code-quote&quot;&gt;&quot;%s: stale dentry %.*s inode &quot;&lt;/span&gt;
                                       DFID&lt;span class=&quot;code-quote&quot;&gt;&quot;, statahead inode &quot;&lt;/span&gt;DFID&lt;span class=&quot;code-quote&quot;&gt;&quot;\n&quot;&lt;/span&gt;,
                                        ll_get_fsname((*dentryp)-&amp;gt;d_inode-&amp;gt;i_sb,
                                                      NULL, 0),
                                        (*dentryp)-&amp;gt;d_name.len,
                                        (*dentryp)-&amp;gt;d_name.name,
                                        PFID(ll_inode2fid((*dentryp)-&amp;gt;d_inode)),
                                        PFID(ll_inode2fid(inode)));
                                GOTO(out, rc = -ESTALE);
                        }

                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; ((bits &amp;amp; MDS_INODELOCK_LOOKUP) &amp;amp;&amp;amp;
                            d_lustre_invalid(*dentryp))
                                d_lustre_revalidate(*dentryp);
                        ll_intent_release(&amp;amp;it);
                }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Note that I changed the &lt;tt&gt;CDEBUG()&lt;/tt&gt; to a &lt;tt&gt;CERROR()&lt;/tt&gt;.&lt;/p&gt;

&lt;p&gt;This is a rare case but it will be reached in a long enough racer run:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 3096.558710] LustreError: 2895:0:(statahead.c:1489:revalidate_statahead_dentry()) lustre: stale dentry 18 inode [0x200000406:0xf1c3:0x0], statahead inode [0x200000405:0xff98:0x0]
[ 3161.014297] LNet: Service thread pid 27768 was inactive for 62.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 3161.018003] Pid: 27768, comm: mdt01_011
[ 3161.018930] 
[ 3161.018931] Call Trace:
[ 3161.019930]  [&amp;lt;ffffffff81086a4c&amp;gt;] ? lock_timer_base+0x3c/0x70
[ 3161.021240]  [&amp;lt;ffffffff81553f43&amp;gt;] schedule_timeout+0x1b3/0x310
[ 3161.022560]  [&amp;lt;ffffffff81086b60&amp;gt;] ? process_timeout+0x0/0x10
[ 3161.024437]  [&amp;lt;ffffffffa0c701a0&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x250 [ptlrpc]
[ 3161.026814]  [&amp;lt;ffffffffa0c74db9&amp;gt;] ldlm_completion_ast+0x609/0x9b0 [ptlrpc]
[ 3161.028977]  [&amp;lt;ffffffff8155639b&amp;gt;] ? _spin_unlock+0x2b/0x40
[ 3161.030625]  [&amp;lt;ffffffff81061d90&amp;gt;] ? default_wake_function+0x0/0x20
[ 3161.032584]  [&amp;lt;ffffffffa0c74111&amp;gt;] ldlm_cli_enqueue_local+0x221/0x8c0 [ptlrpc]
[ 3161.034831]  [&amp;lt;ffffffffa0c747b0&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[ 3161.036390]  [&amp;lt;ffffffffa1322100&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[ 3161.037578]  [&amp;lt;ffffffffa132e65b&amp;gt;] mdt_object_local_lock+0x1bb/0xa80 [mdt]
[ 3161.038830]  [&amp;lt;ffffffffa1322100&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
[ 3161.040037]  [&amp;lt;ffffffffa0c747b0&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
[ 3161.041337]  [&amp;lt;ffffffffa0a80641&amp;gt;] ? lu_object_find_at+0xb1/0xe0 [obdclass]
[ 3161.042610]  [&amp;lt;ffffffffa132f28f&amp;gt;] mdt_object_lock_internal+0x5f/0x2d0 [mdt]
[ 3161.043916]  [&amp;lt;ffffffffa132f5c1&amp;gt;] mdt_object_lock+0x11/0x20 [mdt]
[ 3161.045059]  [&amp;lt;ffffffffa13473e1&amp;gt;] mdt_reint_unlink+0x831/0x10f0 [mdt]
[ 3161.046285]  [&amp;lt;ffffffffa0a9b880&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
[ 3161.047446]  [&amp;lt;ffffffffa1321245&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
[ 3161.048525]  [&amp;lt;ffffffffa133a77c&amp;gt;] ? mdt_root_squash+0x2c/0x3f0 [mdt]
[ 3161.049707]  [&amp;lt;ffffffffa133e8cd&amp;gt;] mdt_reint_rec+0x5d/0x200 [mdt]
[ 3161.050829]  [&amp;lt;ffffffffa1327c53&amp;gt;] mdt_reint_internal+0x633/0xa50 [mdt]
[ 3161.052053]  [&amp;lt;ffffffffa132850b&amp;gt;] mdt_reint+0x6b/0x120 [mdt]
[ 3161.053202]  [&amp;lt;ffffffffa0d0942f&amp;gt;] tgt_request_handle+0x8cf/0x1300 [ptlrpc]
[ 3161.054505]  [&amp;lt;ffffffffa0cb3b0a&amp;gt;] ptlrpc_main+0xdaa/0x18b0 [ptlrpc]
[ 3161.055721]  [&amp;lt;ffffffffa0cb2d60&amp;gt;] ? ptlrpc_main+0x0/0x18b0 [ptlrpc]
[ 3161.056879]  [&amp;lt;ffffffff8109e856&amp;gt;] kthread+0x96/0xa0
[ 3161.057789]  [&amp;lt;ffffffff8100c30a&amp;gt;] child_rip+0xa/0x20
[ 3161.058709]  [&amp;lt;ffffffff815562e0&amp;gt;] ? _spin_unlock_irq+0x30/0x40
[ 3161.059797]  [&amp;lt;ffffffff8100bb10&amp;gt;] ? restore_args+0x0/0x30
[ 3161.060803]  [&amp;lt;ffffffff8109e7c0&amp;gt;] ? kthread+0x0/0xa0
[ 3161.061726]  [&amp;lt;ffffffff8100c300&amp;gt;] ? child_rip+0x0/0x20
[ 3161.062681] 
...
[ 3399.014162] LustreError: 27768:0:(ldlm_request.c:106:ldlm_expired_completion_wait()) ### lock timed out (enqueued at 1444849018, 300s ago); not entering recovery in server code, just going back to sleep ns: mdt-lustre-MDT0000_UUID lock: ffff8800553ac0e8/0x2c4c133c3c59a8c lrc: 3/0,1 mode: --/EX res: [0x200000405:0xff98:0x0].0x0 bits 0x3 rrc: 3 type: IBT flags: 0x40210000000000 nid: local remote: 0x0 expref: -99 pid: 27768 timeout: 0 lvb_type: 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="130483" author="laisiyao" created="Thu, 15 Oct 2015 08:47:36 +0000"  >&lt;p&gt;One change in &lt;a href=&quot;http://review.whamcloud.com/15767&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15767&lt;/a&gt; is that statahead now getattr by fid, instead of name, but I don&apos;t understand how it may cause racer deadlock.&lt;/p&gt;

&lt;p&gt;Oleg, could you revert &lt;a href=&quot;http://review.whamcloud.com/15767&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15767&lt;/a&gt; and see whether this deadlock is gone? I tested master code in local test env, but couldn&apos;t reproduce. &lt;/p&gt;</comment>
                            <comment id="130492" author="jhammond" created="Thu, 15 Oct 2015 12:29:08 +0000"  >&lt;p&gt;Perhaps the changes in 15767 are exposing the latent bug described in my previous comment.&lt;/p&gt;</comment>
                            <comment id="130585" author="gerrit" created="Fri, 16 Oct 2015 03:34:04 +0000"  >&lt;p&gt;Lai Siyao (lai.siyao@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16841&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16841&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7232&quot; title=&quot;racer deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7232&quot;&gt;&lt;del&gt;LU-7232&lt;/del&gt;&lt;/a&gt; statahead: lock leaks if statahead file recreated&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 796d0e2a9e63e6648cdba349e55c5a638f26d75b&lt;/p&gt;</comment>
                            <comment id="131811" author="gerrit" created="Wed, 28 Oct 2015 13:49:20 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/16841/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16841/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7232&quot; title=&quot;racer deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7232&quot;&gt;&lt;del&gt;LU-7232&lt;/del&gt;&lt;/a&gt; statahead: lock leaks if statahead file recreated&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 2c00faec12ea60b8955fe0e793b32ef25795ed42&lt;/p&gt;</comment>
                            <comment id="131843" author="jgmitter" created="Wed, 28 Oct 2015 15:24:15 +0000"  >&lt;p&gt;Landed for 2.8&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxp3j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>