<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:04:34 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-180] Sometimes evicted clients never reconnect</title>
                <link>https://jira.whamcloud.com/browse/LU-180</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;On our benchmarking cluster we observe a very strange behavior of Lustre: sometimes clients that are evicted by an OST never reconnect. The consequence is that we have a lot of Defunct processes on the impacted compute nodes, so we have to reboot them.&lt;/p&gt;

&lt;p&gt;For instance on a compute node we can see that one OST connection is missing:&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;root@kay297 ~&amp;#93;&lt;/span&gt;# lfs df&lt;br/&gt;
UUID                   1K-blocks        Used   Available Use% Mounted on&lt;br/&gt;
scratch-MDT0000_UUID   983490512      887336   982603176   0% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;MDT:0&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0000_UUID  7691221300   547721748  7143497248   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:0&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0001_UUID  7691221300   548640356  7142579792   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:1&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0002_UUID  7691221300   555585984  7135634108   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:2&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0003_UUID  7691221300   551102404  7140118212   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:3&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0004_UUID  7691221300   569235872  7121985356   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:4&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0005_UUID  7691221300   565971400  7125210680   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:5&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0006_UUID  7691221300   551380176  7139839904   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:6&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0007_UUID  7691221300   552060560  7139160472   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:7&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0008_UUID  7691221300   540060736  7151160480   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:8&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0009_UUID  7691221300   542803928  7148417244   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:9&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST000a_UUID  7691221300   549910932  7141309212   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:10&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST000b_UUID  7691221300   553465732  7137754416   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:11&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST000c_UUID  7691221300   547134756  7144086380   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:12&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST000d_UUID  7691221300   542512828  7148708296   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:13&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST000e_UUID  7691221300   540940940  7150278116   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:14&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST000f_UUID  7691221300   552187304  7139031756   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:15&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0010_UUID  7691221300   553010540  7138207368   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:16&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0011_UUID  7691221300   549111608  7142109332   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:17&amp;#93;&lt;/span&gt;&lt;br/&gt;
OST0012             : inactive device&lt;br/&gt;
scratch-OST0013_UUID  7691221300   545678392  7145547784   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:19&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0014_UUID  7691221300   545673392  7145547784   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:20&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0015_UUID  7691221300   553029372  7138191732   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:21&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0016_UUID  7691221300   578557784  7112659292   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:22&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0017_UUID  7691221300   553574948  7137640080   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:23&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0018_UUID  7691221300   593382232  7097838936   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:24&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST0019_UUID  7691221300   550952100  7140232336   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:25&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST001a_UUID  7691221300   604897244  7086322904   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:26&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST001b_UUID  7691221300   545086976  7146133184   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:27&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST001c_UUID  7691221300   550491540  7140725472   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:28&amp;#93;&lt;/span&gt;&lt;br/&gt;
scratch-OST001d_UUID  7691221300   542008368  7149211712   7% /scratch_lustre&lt;span class=&quot;error&quot;&gt;&amp;#91;OST:29&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;filesystem summary:  215354196400 16076170152 199823591804   7% /scratch_lustre&lt;/p&gt;


&lt;p&gt;The phenomenon is erratic, it does not always impact the same clients or the same OSTs.&lt;/p&gt;

&lt;p&gt;I attach the syslogs of kay297 (client) and kay3 (OSS).&lt;/p&gt;</description>
                <environment></environment>
        <key id="10527">LU-180</key>
            <summary>Sometimes evicted clients never reconnect</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="sebastien.buisson">Sebastien Buisson</reporter>
                        <labels>
                    </labels>
                <created>Wed, 30 Mar 2011 06:07:32 +0000</created>
                <updated>Tue, 19 Apr 2011 08:57:21 +0000</updated>
                            <resolved>Tue, 19 Apr 2011 08:57:21 +0000</resolved>
                                    <version>Lustre 2.0.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="11607" author="sebastien.buisson" created="Thu, 31 Mar 2011 02:19:33 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;After less than 24 hours we have a new occurrence of this issue. The client node is kay310 (10.17.1.211@o2ib) and the OST is scratch-OST000b hosted by OSS kay2.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;root@kay310 ~&amp;#93;&lt;/span&gt;# lctl dl&lt;br/&gt;
  0 UP mgc MGC10.17.0.2@o2ib 1d6fa677-53e6-5478-7b63-13c135edb2cd 5&lt;br/&gt;
  1 UP lov scratch-clilov-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 4&lt;br/&gt;
  2 UP lmv scratch-clilmv-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 4&lt;br/&gt;
  3 UP mdc scratch-MDT0000-mdc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
  4 UP osc scratch-OST0016-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
  5 UP osc scratch-OST0017-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
  6 UP osc scratch-OST001d-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
  7 UP osc scratch-OST0010-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
  8 UP osc scratch-OST0003-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
  9 UP osc scratch-OST0015-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 10 UP osc scratch-OST0005-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 11 UP osc scratch-OST0009-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 12 UP osc scratch-OST0007-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 13 UP osc scratch-OST0006-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 14 UP osc scratch-OST000d-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 15 UP osc scratch-OST0002-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 16 UP osc scratch-OST000e-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 17 UP osc scratch-OST0004-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 18 UP osc scratch-OST000f-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 19 UP osc scratch-OST0008-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 20 UP osc scratch-OST001b-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 21 IN osc scratch-OST000b-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 22 UP osc scratch-OST0019-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 23 UP osc scratch-OST0012-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 24 UP osc scratch-OST000a-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 25 UP osc scratch-OST0011-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 26 UP osc scratch-OST000c-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 27 UP osc scratch-OST0001-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 28 UP osc scratch-OST001c-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 29 UP osc scratch-OST0014-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 30 UP osc scratch-OST0018-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 31 UP osc scratch-OST001a-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 32 UP osc scratch-OST0000-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;br/&gt;
 33 UP osc scratch-OST0013-osc-ffff8803313ff000 0d84de33-3370-ded5-56b3-420997b67d2c 5&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;root@kay310 ~&amp;#93;&lt;/span&gt;# cat /proc/fs/lustre/osc/scratch-OST000b-osc-ffff8803313ff000/import &lt;br/&gt;
import:&lt;br/&gt;
    name: scratch-OST000b-osc-ffff8803313ff000&lt;br/&gt;
    target: scratch-OST000b_UUID&lt;br/&gt;
    state: EVICTED&lt;br/&gt;
    connect_flags: &lt;span class=&quot;error&quot;&gt;&amp;#91;write_grant, server_lock, version, request_portal, truncate_lock, max_byte_per_rpc, early_lock_cancel, adaptive_timeouts, lru_resize, alt_checksum_algorithm, version_recovery, full20&amp;#93;&lt;/span&gt;&lt;br/&gt;
    import_flags: &lt;span class=&quot;error&quot;&gt;&amp;#91;invalid, replayable, pingable&amp;#93;&lt;/span&gt;&lt;br/&gt;
    connection:&lt;br/&gt;
       failover_nids: &lt;span class=&quot;error&quot;&gt;&amp;#91;10.17.0.3@o2ib&amp;#93;&lt;/span&gt;&lt;br/&gt;
       current_connection: 10.17.0.3@o2ib&lt;br/&gt;
       connection_attempts: 2&lt;br/&gt;
       generation: 2&lt;br/&gt;
       in-progress_invalidations: 1&lt;br/&gt;
    rpcs:&lt;br/&gt;
       inflight: 0&lt;br/&gt;
       unregistering: 0&lt;br/&gt;
       timeouts: 0&lt;br/&gt;
       avg_waittime: 167693 usec&lt;br/&gt;
    service_estimates:&lt;br/&gt;
       services: 1 sec&lt;br/&gt;
       network: 1 sec&lt;br/&gt;
    transactions:&lt;br/&gt;
       last_replay: 0&lt;br/&gt;
       peer_committed: 4514305&lt;br/&gt;
       last_checked: 4514305&lt;br/&gt;
    read_data_averages:&lt;br/&gt;
       bytes_per_rpc: 1046287&lt;br/&gt;
       usec_per_rpc: 57030&lt;br/&gt;
       MB_per_sec: 18.34&lt;br/&gt;
    write_data_averages:&lt;br/&gt;
       bytes_per_rpc: 1046268&lt;br/&gt;
       usec_per_rpc: 585258&lt;br/&gt;
       MB_per_sec: 1.78&lt;/p&gt;



&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;buissons@kay2 ~&amp;#93;&lt;/span&gt;$ lctl dl&lt;br/&gt;
  0 UP mgc MGC10.17.0.2@o2ib b612ff68-0ab5-593c-d7ca-0fc4e5087a57 5&lt;br/&gt;
  1 UP ost OSS OSS_uuid 3&lt;br/&gt;
  2 UP obdfilter scratch-OST0006 scratch-OST0006_UUID 157&lt;br/&gt;
  3 UP obdfilter scratch-OST000b scratch-OST000b_UUID 153&lt;br/&gt;
  4 UP obdfilter scratch-OST0017 scratch-OST0017_UUID 157&lt;br/&gt;
  5 UP obdfilter scratch-OST0007 scratch-OST0007_UUID 157&lt;br/&gt;
  6 UP obdfilter scratch-OST0008 scratch-OST0008_UUID 157&lt;br/&gt;
  7 UP obdfilter scratch-OST001d scratch-OST001d_UUID 157&lt;br/&gt;
  8 UP obdfilter scratch-OST000c scratch-OST000c_UUID 157&lt;br/&gt;
  9 UP obdfilter scratch-OST0003 scratch-OST0003_UUID 157&lt;br/&gt;
 10 UP obdfilter scratch-OST0004 scratch-OST0004_UUID 157&lt;br/&gt;
 11 UP obdfilter scratch-OST0016 scratch-OST0016_UUID 157&lt;br/&gt;
 12 UP obdfilter scratch-OST0005 scratch-OST0005_UUID 155&lt;br/&gt;
 13 UP obdfilter scratch-OST0010 scratch-OST0010_UUID 157&lt;br/&gt;
 14 UP obdfilter scratch-OST0002 scratch-OST0002_UUID 157&lt;br/&gt;
 15 UP obdfilter scratch-OST0015 scratch-OST0015_UUID 157&lt;br/&gt;
 16 UP obdfilter scratch-OST000a scratch-OST000a_UUID 157&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;buissons@kay2 ~&amp;#93;&lt;/span&gt;$ cat /proc/fs/lustre/obdfilter/scratch-OST000b/exports/10.17.1.211&amp;#64;o2ib/uuid &lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;buissons@kay2 ~&amp;#93;&lt;/span&gt;$ cat /proc/fs/lustre/obdfilter/scratch-OST000b/exports/10.17.1.211&amp;#64;o2ib/stats&lt;br/&gt;
snapshot_time             1301562458.151546 secs.usecs&lt;br/&gt;
read_bytes                4109 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;bytes&amp;#93;&lt;/span&gt; 4096 1048576 4299194368&lt;br/&gt;
write_bytes               4089 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;bytes&amp;#93;&lt;/span&gt; 57344 1048576 4278190080&lt;br/&gt;
get_info                  41 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;br/&gt;
set_info_async            3 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;br/&gt;
disconnect                6 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;br/&gt;
sync                      6 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;br/&gt;
preprw                    8198 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;br/&gt;
commitrw                  8198 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;br/&gt;
ping                      24659 samples &lt;span class=&quot;error&quot;&gt;&amp;#91;reqs&amp;#93;&lt;/span&gt;&lt;/p&gt;



&lt;p&gt;This is very annoying because I tried to reactivate the import on the client, but with no success.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;root@kay310 ~&amp;#93;&lt;/span&gt;# lctl --device 21 activate&lt;br/&gt;
error: activate: failed: Invalid argument&lt;/p&gt;

&lt;p&gt;The only option we have is to reboot the impacted Lustre clients, which is not acceptable on a benchmarking cluster.&lt;/p&gt;

&lt;p&gt;Sebastien.&lt;/p&gt;</comment>
                            <comment id="11629" author="pjones" created="Thu, 31 Mar 2011 04:29:49 +0000"  >&lt;p&gt;Niu&lt;/p&gt;

&lt;p&gt;Could you look into this one please?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="11630" author="sebastien.buisson" created="Thu, 31 Mar 2011 06:23:57 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;This problem seems to be similar to bugzilla 21636. The initial description in this bugzilla is not really the same, but if you look at comment 82 you will see that the errors are exactly the same as what we are suffering from on our benchmarking cluster.&lt;/p&gt;

&lt;p&gt;HTH&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="11739" author="niu" created="Fri, 1 Apr 2011 00:17:53 +0000"  >&lt;p&gt;Looks the invalidate thread did never finish the import invalidation job, I suspect that there is something deadlock.&lt;/p&gt;

&lt;p&gt;Hi, Sebastien&lt;/p&gt;

&lt;p&gt;Could you also collect all the threads&apos; stack trace of the abnormal client when this issue happened? (echo t &amp;gt; /proc/sysrq-trigger) I want to see where the invalidate thread was blocked.&lt;/p&gt;</comment>
                            <comment id="12998" author="niu" created="Mon, 18 Apr 2011 21:00:34 +0000"  >&lt;p&gt;This bug is probably caused by a deadlock in clio, which could cause the client being evicted, and because of the deadlock, the invalidation thread (ll_imp_inval) would be blocked on a semaphore, then the import can not be reactived anymore.&lt;/p&gt;

&lt;p&gt;The deadlock issue is &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-122&quot; title=&quot;Revert bug 21122 since it causes deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-122&quot;&gt;&lt;del&gt;LU-122&lt;/del&gt;&lt;/a&gt;, and Jay is working on it.&lt;/p&gt;

&lt;p&gt;Hi, Sebastien, when you hit the issue next time, could you please check the stack trace to see if it&apos;s similar to the trace shown in the attached file? Thanks. &lt;/p&gt;
</comment>
                            <comment id="13005" author="pichong" created="Tue, 19 Apr 2011 05:38:10 +0000"  >&lt;p&gt;There is a new occurence of the bug.&lt;/p&gt;

&lt;p&gt;In attachment is the trace log of the client&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;at the time the client was evicted by the OST&lt;/li&gt;
	&lt;li&gt;with the threads&apos; stack trace generated by &apos;sysrq-trigger&apos;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Tell me if you need other information from the client node.&lt;/p&gt;</comment>
                            <comment id="13015" author="pichong" created="Tue, 19 Apr 2011 07:23:23 +0000"  >&lt;p&gt;The stack trace of the new occurence looks similar to the one in deadlock-trace.log.&lt;/p&gt;

&lt;p&gt;We are going to install the fix described by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-122&quot; title=&quot;Revert bug 21122 since it causes deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-122&quot;&gt;&lt;del&gt;LU-122&lt;/del&gt;&lt;/a&gt; and see if it reproduces.&lt;/p&gt;</comment>
                            <comment id="13022" author="niu" created="Tue, 19 Apr 2011 08:57:21 +0000"  >&lt;p&gt;Hi, Gregoire&lt;/p&gt;

&lt;p&gt;Yes, I checked the log and I believe it&apos;s the deadlock problem in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-122&quot; title=&quot;Revert bug 21122 since it causes deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-122&quot;&gt;&lt;del&gt;LU-122&lt;/del&gt;&lt;/a&gt;, mark it as duplicated.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="10176" name="client.log.gz" size="43895" author="pichong" created="Tue, 19 Apr 2011 05:38:10 +0000"/>
                            <attachment id="10174" name="deadlock-trace.log" size="8971" author="niu" created="Mon, 18 Apr 2011 21:00:34 +0000"/>
                            <attachment id="10161" name="kay297" size="39631" author="sebastien.buisson" created="Wed, 30 Mar 2011 06:07:32 +0000"/>
                            <attachment id="10162" name="kay3" size="22361" author="sebastien.buisson" created="Wed, 30 Mar 2011 06:07:32 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvslz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>8539</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>