<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:18:51 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1690] Permanent eviction scenario starting with Lustre 2.1.1</title>
                <link>https://jira.whamcloud.com/browse/LU-1690</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;On our 3 big customer clusters running Lustre 2.1, we have quite frequent situations where Lustre Clients are evicted by OSSs servers and never reconnect. As a consequence, those nodes have to be rebooted.&lt;/p&gt;

&lt;p&gt;When the scenario occurs, usually some parallel application is running on the affected Client node, and some of its threads are found stuck since quite a long time in a non-interruptible state, along with Lustre threads, like following :&lt;br/&gt;
==========================================&lt;br/&gt;
crash&amp;gt; ps | grep UN&lt;br/&gt;
  17353      2   2  ffff880fd60560c0  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_13&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17355      2   6  ffff880f287a1100  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_15&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17356      2  11  ffff880ce88cf850  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_16&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17357      2  31  ffff880e434b1790  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_17&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17359      2   1  ffff880e434b1040  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_18&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17366      2   7  ffff880be74b2080  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_20&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17368      2  18  ffff880c92ed2080  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_22&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17369      2   6  ffff8802b3311850  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_23&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17370      2  11  ffff88035fd7c850  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_24&amp;#93;&lt;/span&gt;&lt;br/&gt;
  17372      2   0  ffff88087d691080  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_25&amp;#93;&lt;/span&gt;&lt;br/&gt;
  33850      2  31  ffff881055105790  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_02&amp;#93;&lt;/span&gt;&lt;br/&gt;
  33851      2  15  ffff88047c9db0c0  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_03&amp;#93;&lt;/span&gt;&lt;br/&gt;
  33852      2  30  ffff88047d788040  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_04&amp;#93;&lt;/span&gt;&lt;br/&gt;
  33853      2   3  ffff88105506a7d0  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_05&amp;#93;&lt;/span&gt;&lt;br/&gt;
  33854      2   6  ffff8802c8d38810  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_06&amp;#93;&lt;/span&gt;&lt;br/&gt;
  33856      2  30  ffff8803f7682850  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_08&amp;#93;&lt;/span&gt;&lt;br/&gt;
  38029      2  26  ffff880bdf7e87d0  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_10&amp;#93;&lt;/span&gt;&lt;br/&gt;
  67452  67447  16  ffff88087ae3b040  UN   1.9 5984848 1325628  %%U657_malheur&lt;br/&gt;
  67453  67447   8  ffff8805232cc850  UN   1.9 5977308 1319328  %%U657_malheur&lt;br/&gt;
  67454  67447  24  ffff88087c67e080  UN   1.9 5981192 1322480  %%U657_malheur&lt;br/&gt;
  67455  67447   4  ffff8806734140c0  UN   1.9 5992684 1331788  %%U657_malheur&lt;br/&gt;
  67456  67447  20  ffff88087d8c9850  UN   1.9 6001336 1338540  %%U657_malheur&lt;br/&gt;
  67457  67447  12  ffff88057b7e3790  UN   2.0 6026500 1360764  %%U657_malheur&lt;br/&gt;
  67458  67447  28  ffff88087d8c9100  UN   2.0 6053684 1380308  %%U657_malheur&lt;br/&gt;
  67459  67447   1  ffff88062ba0c7d0  UN   2.0 6053928 1380104  %%U657_malheur&lt;br/&gt;
  67460  67447  17  ffff88087cc0e850  UN   2.0 6026372 1360408  %%U657_malheur&lt;br/&gt;
  67461  67447   9  ffff8806a4e8d790  UN   1.9 6001772 1338920  %%U657_malheur&lt;br/&gt;
  67462  67447  25  ffff8806a4e8d040  UN   1.9 5993776 1332416  %%U657_malheur&lt;br/&gt;
  67463  67447   5  ffff8804ad4bf7d0  UN   1.9 5981100 1322596  %%U657_malheur&lt;br/&gt;
  67464  67447  21  ffff8804ad4bf080  UN   2.0 6035776 1367864  %%U657_malheur&lt;br/&gt;
  67465  67447  13  ffff8807b3b89810  UN   2.0 6041500 1371540  %%U657_malheur&lt;br/&gt;
  69081      2   7  ffff8807f7a26790  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_11&amp;#93;&lt;/span&gt;&lt;br/&gt;
  69082      2   4  ffff88086c1287d0  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_12&amp;#93;&lt;/span&gt;&lt;br/&gt;
  80827      2   2  ffff880c7bd9e850  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpcd_14&amp;#93;&lt;/span&gt;&lt;br/&gt;
  80864      2   4  ffff88107cd4a040  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_00&amp;#93;&lt;/span&gt;&lt;br/&gt;
  80865      2   3  ffff88107d2a5080  UN   0.0       0      0  &lt;span class=&quot;error&quot;&gt;&amp;#91;ldlm_bl_01&amp;#93;&lt;/span&gt;&lt;br/&gt;
==========================================&lt;/p&gt;

&lt;p&gt;and these threads stacks look like :&lt;br/&gt;
====================================&lt;br/&gt;
PID: 17353  TASK: ffff880fd60560c0  CPU: 2   COMMAND: &quot;ldlm_bl_13&quot;&lt;br/&gt;
 #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7bf0&amp;#93;&lt;/span&gt; schedule at ffffffff8147dddc&lt;br/&gt;
 #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7cb8&amp;#93;&lt;/span&gt; __mutex_lock_slowpath at ffffffff8147f33e&lt;br/&gt;
 #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7d38&amp;#93;&lt;/span&gt; mutex_lock at ffffffff8147f1cb&lt;br/&gt;
 #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7d58&amp;#93;&lt;/span&gt; cl_lock_mutex_get at ffffffffa04a4a68 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7d98&amp;#93;&lt;/span&gt; osc_ldlm_blocking_ast at ffffffffa0756e19 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7e28&amp;#93;&lt;/span&gt; ldlm_handle_bl_callback at ffffffffa05636fd &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7e88&amp;#93;&lt;/span&gt; ldlm_bl_thread_main at ffffffffa0563b51 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881054bd7f48&amp;#93;&lt;/span&gt; kernel_thread at ffffffff810041aa&lt;/p&gt;

&lt;p&gt;or&lt;/p&gt;

&lt;p&gt;PID: 67453  TASK: ffff8805232cc850  CPU: 8   COMMAND: &quot;%%U657_malheur&quot;&lt;br/&gt;
 #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fff988&amp;#93;&lt;/span&gt; schedule at ffffffff8147dddc&lt;br/&gt;
 #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffa50&amp;#93;&lt;/span&gt; __mutex_lock_slowpath at ffffffff8147f33e&lt;br/&gt;
 #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffad0&amp;#93;&lt;/span&gt; mutex_lock at ffffffff8147f1cb&lt;br/&gt;
 #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffaf0&amp;#93;&lt;/span&gt; cl_lock_mutex_get at ffffffffa04a4a68 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffb30&amp;#93;&lt;/span&gt; cl_lock_hold_mutex at ffffffffa04a6dfa &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffbf0&amp;#93;&lt;/span&gt; cl_lock_request at ffffffffa04a88ae &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffc80&amp;#93;&lt;/span&gt; cl_glimpse_lock at ffffffffa084d170 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffd10&amp;#93;&lt;/span&gt; cl_glimpse_size at ffffffffa084d9d4 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffd90&amp;#93;&lt;/span&gt; ll_inode_revalidate_it at ffffffffa08032af &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffdf0&amp;#93;&lt;/span&gt; ll_getattr_it at ffffffffa0803479 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffe20&amp;#93;&lt;/span&gt; ll_getattr at ffffffffa08035d7 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffe60&amp;#93;&lt;/span&gt; vfs_getattr at ffffffff81163041&lt;br/&gt;
#12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffea0&amp;#93;&lt;/span&gt; vfs_fstatat at ffffffff811630d0&lt;br/&gt;
#13 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffee0&amp;#93;&lt;/span&gt; vfs_stat at ffffffff8116321b&lt;br/&gt;
#14 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683fffef0&amp;#93;&lt;/span&gt; sys_newstat at ffffffff81163244&lt;br/&gt;
#15 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880683ffff80&amp;#93;&lt;/span&gt; system_call_fastpath at ffffffff81003172&lt;br/&gt;
    RIP: 0000003b7bcd82e5  RSP: 00007fffb146b860  RFLAGS: 00000246&lt;br/&gt;
    RAX: 0000000000000004  RBX: ffffffff81003172  RCX: fffffffffffffffa&lt;br/&gt;
    RDX: 00007fffb146a258  RSI: 00007fffb146a258  RDI: 000000001324d0d8&lt;br/&gt;
    RBP: 0000000016800060   R8: 0000000000000077   R9: 00007fffb146a420&lt;br/&gt;
    R10: 0000000003bfa080  R11: 0000000000000246  R12: 00000000131e4c50&lt;br/&gt;
    R13: 0000000000000077  R14: ffffffff8104d9ee  R15: ffff880683ffff78&lt;br/&gt;
    ORIG_RAX: 0000000000000004  CS: 0033  SS: 002b&lt;br/&gt;
====================================&lt;/p&gt;


&lt;p&gt;Usually there is also another thread, not among the non-interruptible ones, with the following stack :&lt;br/&gt;
====================================&lt;br/&gt;
crash&amp;gt; ps | grep %%U657_malheur | grep -v UN&lt;br/&gt;
  67451  67660   0  ffff880673414810  IN   1.9 6012000 1347004  %%U657_malheur&lt;br/&gt;
crash&amp;gt; bt ffff880673414810&lt;br/&gt;
PID: 67451  TASK: ffff880673414810  CPU: 0   COMMAND: &quot;%%U657_malheur&quot;&lt;br/&gt;
 #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93548&amp;#93;&lt;/span&gt; schedule at ffffffff8147dddc&lt;br/&gt;
 #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93610&amp;#93;&lt;/span&gt; cfs_waitq_wait at ffffffffa03be75e &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93620&amp;#93;&lt;/span&gt; cl_sync_io_wait at ffffffffa04ac18f &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de936f0&amp;#93;&lt;/span&gt; cl_io_submit_sync at ffffffffa04ac387 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93750&amp;#93;&lt;/span&gt; cl_lock_page_out at ffffffffa04a7c3f &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93800&amp;#93;&lt;/span&gt; osc_lock_flush at ffffffffa075607f &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93850&amp;#93;&lt;/span&gt; osc_lock_cancel at ffffffffa075611a &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de938d0&amp;#93;&lt;/span&gt; cl_lock_cancel0 at ffffffffa04a2a6d &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93920&amp;#93;&lt;/span&gt; cl_lock_hold_release at ffffffffa04a40b5 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93960&amp;#93;&lt;/span&gt; cl_lock_unhold at ffffffffa04a5e70 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
#10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de939b0&amp;#93;&lt;/span&gt; lov_sublock_release at ffffffffa07b3b16 &lt;span class=&quot;error&quot;&gt;&amp;#91;lov&amp;#93;&lt;/span&gt;&lt;br/&gt;
#11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93a30&amp;#93;&lt;/span&gt; lov_lock_enqueue at ffffffffa07b65b8 &lt;span class=&quot;error&quot;&gt;&amp;#91;lov&amp;#93;&lt;/span&gt;&lt;br/&gt;
#12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93b00&amp;#93;&lt;/span&gt; cl_enqueue_try at ffffffffa04a6a4b &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
#13 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93b80&amp;#93;&lt;/span&gt; cl_enqueue_locked at ffffffffa04a8550 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
#14 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93bf0&amp;#93;&lt;/span&gt; cl_lock_request at ffffffffa04a88ce &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
#15 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93c80&amp;#93;&lt;/span&gt; cl_glimpse_lock at ffffffffa084d170 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#16 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93d10&amp;#93;&lt;/span&gt; cl_glimpse_size at ffffffffa084d9d4 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#17 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93d90&amp;#93;&lt;/span&gt; ll_inode_revalidate_it at ffffffffa08032af &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#18 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93df0&amp;#93;&lt;/span&gt; ll_getattr_it at ffffffffa0803479 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#19 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93e20&amp;#93;&lt;/span&gt; ll_getattr at ffffffffa08035d7 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93e60&amp;#93;&lt;/span&gt; vfs_getattr at ffffffff81163041&lt;br/&gt;
#21 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93ea0&amp;#93;&lt;/span&gt; vfs_fstatat at ffffffff811630d0&lt;br/&gt;
#22 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93ee0&amp;#93;&lt;/span&gt; vfs_stat at ffffffff8116321b&lt;br/&gt;
#23 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93ef0&amp;#93;&lt;/span&gt; sys_newstat at ffffffff81163244&lt;br/&gt;
#24 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88064de93f80&amp;#93;&lt;/span&gt; system_call_fastpath at ffffffff81003172&lt;br/&gt;
    RIP: 0000003b7bcd82e5  RSP: 00007fffcb1cc638  RFLAGS: 00000202&lt;br/&gt;
    RAX: 0000000000000004  RBX: ffffffff81003172  RCX: 0000003b7bccd177&lt;br/&gt;
    RDX: 00007fffcb1cc8d8  RSI: 00007fffcb1cc8d8  RDI: 0000000014583ba8&lt;br/&gt;
    RBP: 0000000016800060   R8: 0000000000000077   R9: 00007fffcb1ccaa0&lt;br/&gt;
    R10: 0000000003bfa080  R11: 0000000000000246  R12: 000000001426cb00&lt;br/&gt;
    R13: 0000000000000077  R14: ffffffff8104d9ee  R15: ffff88064de93f78&lt;br/&gt;
    ORIG_RAX: 0000000000000004  CS: 0033  SS: 002b&lt;br/&gt;
====================================&lt;/p&gt;

&lt;p&gt;This last thread is the current &quot;owner&quot; of the cl_lock struct(s), causing the other threads to be hung.  The last associated logs are :&lt;br/&gt;
====================================================&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 15:50:01 2012&lt;/p&gt;

&lt;p&gt;LustreError: 67467:0:(file.c:2186:ll_inode_revalidate_fini()) failure -116 inode 144150960170616438&lt;br/&gt;
LustreError: 67467:0:(file.c:2186:ll_inode_revalidate_fini()) Skipped 259 previous similar messages&lt;br/&gt;
Lustre: DEBUG MARKER: Mon Jun 18 15:55:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 16:00:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 16:05:01 2012&lt;/p&gt;

&lt;p&gt;LustreError: 67451:0:(file.c:2186:ll_inode_revalidate_fini()) failure -116 inode 144150960170616931&lt;br/&gt;
LustreError: 67451:0:(file.c:2186:ll_inode_revalidate_fini()) Skipped 187 previous similar messages&lt;br/&gt;
Lustre: DEBUG MARKER: Mon Jun 18 16:10:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 16:15:01 2012&lt;/p&gt;

&lt;p&gt;LustreError: 67467:0:(file.c:2186:ll_inode_revalidate_fini()) failure -116 inode 144150960170617447&lt;br/&gt;
LustreError: 67467:0:(file.c:2186:ll_inode_revalidate_fini()) Skipped 143 previous similar messages&lt;br/&gt;
Lustre: DEBUG MARKER: Mon Jun 18 16:20:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 16:25:01 2012&lt;/p&gt;

&lt;p&gt;........&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:10:01 2012&lt;/p&gt;

&lt;p&gt;LustreError: 67467:0:(file.c:2186:ll_inode_revalidate_fini()) failure -116 inode 144150960170620220&lt;br/&gt;
LustreError: 67467:0:(file.c:2186:ll_inode_revalidate_fini()) Skipped 51 previous similar messages&lt;br/&gt;
Lustre: DEBUG MARKER: Mon Jun 18 17:15:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:20:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:25:01 2012&lt;/p&gt;

&lt;p&gt;LustreError: 67451:0:(cl_io.c:1700:cl_sync_io_wait()) SYNC IO failed with error: -110, try to cancel 36 remaining pages&lt;br/&gt;
LustreError: 67451:0:(cl_io.c:965:cl_io_cancel()) Canceling ongoing page trasmission&lt;br/&gt;
Lustre: DEBUG MARKER: Mon Jun 18 17:30:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:35:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:40:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:45:01 2012&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: Mon Jun 18 17:50:01 2012&lt;/p&gt;

&lt;p&gt;================================================================&lt;/p&gt;

&lt;p&gt;This thread is awaiting for some pages still missing their flush status from the server.&lt;/p&gt;


&lt;p&gt;Unfortunately, no matching logs have been found on the server side.&lt;/p&gt;


&lt;p&gt;TIA,&lt;br/&gt;
Sebastien.&lt;/p&gt;</description>
                <environment></environment>
        <key id="15342">LU-1690</key>
            <summary>Permanent eviction scenario starting with Lustre 2.1.1</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="jay">Jinshan Xiong</assignee>
                                    <reporter username="sebastien.buisson">Sebastien Buisson</reporter>
                        <labels>
                    </labels>
                <created>Tue, 31 Jul 2012 09:53:36 +0000</created>
                <updated>Thu, 8 Feb 2018 18:29:52 +0000</updated>
                            <resolved>Thu, 8 Feb 2018 18:29:52 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="42497" author="jay" created="Tue, 31 Jul 2012 11:21:39 +0000"  >&lt;p&gt;this looks similar to the problem &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-874&quot; title=&quot;Client eviction on lock callback timeout &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-874&quot;&gt;&lt;del&gt;LU-874&lt;/del&gt;&lt;/a&gt; where the client was evicted while it&apos;s canceling the lock. Do you have these commits in your build:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;1d9f8f9 LU-874 ldlm: Fix ldlm_bl_* thread creation
0b9d039 LU-874 osc: prioritize writeback pages
7596538 LU-874 ldlm: prioritize LDLM_CANCEL requests
73535a0 LU-874 ptlrpc: handle in-flight hqreq correctly
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Also, the log on the server side is pretty important, can you please attach it here if you happen to collect one?&lt;/p&gt;</comment>
                            <comment id="42506" author="pjones" created="Tue, 31 Jul 2012 13:55:53 +0000"  >&lt;p&gt;Jinshan&lt;/p&gt;

&lt;p&gt;IIRC these sites are running 2.1.2 which contains the fixes for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-874&quot; title=&quot;Client eviction on lock callback timeout &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-874&quot;&gt;&lt;del&gt;LU-874&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="42507" author="jay" created="Tue, 31 Jul 2012 14:14:35 +0000"  >&lt;p&gt;In that case, firstly we need the log from OST side to see why the write back is timedout.&lt;/p&gt;

&lt;p&gt;Do you see this messsage:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 67451:0:(cl_io.c:1700:cl_sync_io_wait()) SYNC IO failed with error: -110, try to cancel 36 remaining pages
LustreError: 67451:0:(cl_io.c:965:cl_io_cancel()) Canceling ongoing page trasmission
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;happened often on the client side? This clearly shows the writeback was not finished in 10 minutes.&lt;/p&gt;</comment>
                            <comment id="42537" author="sebastien.buisson" created="Wed, 1 Aug 2012 03:20:08 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;These sites were running Lustre 2.1.2, but due to some instabilities, they reverted back to 2.1.1. However, the Lustre 2.1.1 version they are running now has one patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-874&quot; title=&quot;Client eviction on lock callback timeout &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-874&quot;&gt;&lt;del&gt;LU-874&lt;/del&gt;&lt;/a&gt; (only one), this one:&lt;br/&gt;
1d9f8f9 &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-874&quot; title=&quot;Client eviction on lock callback timeout &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-874&quot;&gt;&lt;del&gt;LU-874&lt;/del&gt;&lt;/a&gt; ldlm: Fix ldlm_bl_* thread creation&lt;/p&gt;

&lt;p&gt;I guess missing 3 out of 4 patches from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-874&quot; title=&quot;Client eviction on lock callback timeout &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-874&quot;&gt;&lt;del&gt;LU-874&lt;/del&gt;&lt;/a&gt; is not ideal...&lt;/p&gt;

&lt;p&gt;If it is still useful, I can ask for the logs on the server side. What do you need? OSS? MDS?&lt;/p&gt;

&lt;p&gt;Cheers,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="42549" author="jay" created="Wed, 1 Aug 2012 12:17:36 +0000"  >&lt;p&gt;Hi Sebasien, LLNL was experiencing the simliar issue and you need all of them to fix the problem. Please apply the other 3 and try again. I need the log from OSS side if you can still see this issue. Thanks.&lt;/p&gt;</comment>
                            <comment id="220470" author="jay" created="Thu, 8 Feb 2018 18:29:52 +0000"  >&lt;p&gt;close old tickets&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Wed, 1 Aug 2012 09:53:36 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv3gf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4059</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 31 Jul 2012 09:53:36 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>