<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:49:40 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5233] 2.6 DNE stress testing: (lod_object.c:930:lod_declare_attr_set()) ASSERTION( lo-&gt;ldo_stripe ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-5233</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;On the same system as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5204&quot; title=&quot;2.6 DNE stress testing: EINVAL when attempting to delete file&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5204&quot;&gt;&lt;del&gt;LU-5204&lt;/del&gt;&lt;/a&gt; (with OST38/0026 still not reachable from MDS1/MDT0), we hit this LBUG on MDS1 during stress testing:&lt;/p&gt;

&lt;p&gt;0&amp;gt;LustreError: 26714:0:(lod_object.c:930:lod_declare_attr_set()) ASSERTION( lo-&amp;gt;ldo_stripe ) failed:&lt;br/&gt;
&amp;lt;0&amp;gt;LustreError: 26714:0:(lod_object.c:930:lod_declare_attr_set()) LBUG&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 26714, comm: mdt02_089&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c55895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c55e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa15d70e0&amp;gt;&amp;#93;&lt;/span&gt; lod_declare_attr_set+0x600/0x660 &lt;span class=&quot;error&quot;&gt;&amp;#91;lod&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16338b8&amp;gt;&amp;#93;&lt;/span&gt; mdd_declare_object_initialize+0xa8/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1635018&amp;gt;&amp;#93;&lt;/span&gt; mdd_create+0xb88/0x1870 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1506217&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_create+0xcf7/0xed0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1500a81&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_rec+0x41/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa14e5e93&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint_internal+0x4c3/0x7c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa14e671b&amp;gt;&amp;#93;&lt;/span&gt; mdt_reint+0x6b/0x120 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa103a2ac&amp;gt;&amp;#93;&lt;/span&gt; tgt_request_handle+0x23c/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fe9d1a&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xd1a/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fe9000&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aee6&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ae50&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;


&lt;p&gt;Additionally, we had the following stuck thread:&lt;br/&gt;
&amp;lt;3&amp;gt;INFO: task mdt01_020:26426 blocked for more than 120 seconds.&lt;br/&gt;
&amp;lt;3&amp;gt;      Not tainted 2.6.32-431.5.1.el6.x86_64 #1&lt;br/&gt;
&amp;lt;3&amp;gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.&lt;br/&gt;
&amp;lt;6&amp;gt;mdt01_020     D 000000000000000a     0 26426      2 0x00000000&lt;br/&gt;
&amp;lt;4&amp;gt; ffff880ffa4d7af0 0000000000000046 0000000000000000 ffffffffa0c6bd75&lt;br/&gt;
&amp;lt;4&amp;gt; 0000000100000000 ffffc9003aa25030 0000000000000246 0000000000000246&lt;br/&gt;
&amp;lt;4&amp;gt; ffff88100aaae638 ffff880ffa4d7fd8 000000000000fbc8 ffff88100aaae638&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c6bd75&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_hash_bd_lookup_intent+0x65/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d225db&amp;gt;&amp;#93;&lt;/span&gt; lu_object_find_at+0xab/0x350 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81065df0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d22896&amp;gt;&amp;#93;&lt;/span&gt; lu_object_find+0x16/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa14e2ea6&amp;gt;&amp;#93;&lt;/span&gt; mdt_object_find+0x56/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa14e4d2b&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x75b/0xca0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f8e899&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x369/0x930 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fb7d8f&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x4ef/0x10b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1039f02&amp;gt;&amp;#93;&lt;/span&gt; tgt_enqueue+0x62/0x1d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa103a2ac&amp;gt;&amp;#93;&lt;/span&gt; tgt_request_handle+0x23c/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fe9d1a&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xd1a/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fe9000&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aee6&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ae50&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;For some time before the LBUG.  This thread is - in all of these instances - stuck in a rather odd spot in cfs_hash_bd_lookup_intent:&lt;br/&gt;
        match = intent_add ? NULL : hnode;&lt;br/&gt;
        hlist_for_each(ehnode, hhead) {&lt;br/&gt;
                if (!cfs_hash_keycmp(hs, key, ehnode))&lt;br/&gt;
                        continue;&lt;/p&gt;

&lt;p&gt;Specifically, it reports as being stuck on the cfs_hash_keycmp line.  It&apos;s not clear to me how a thread could get stuck there.  I may be missing some operation it&apos;s doing as part of that.&lt;/p&gt;

&lt;p&gt;I&apos;ll make the dump available shortly.&lt;/p&gt;</description>
                <environment></environment>
        <key id="25227">LU-5233</key>
            <summary>2.6 DNE stress testing: (lod_object.c:930:lod_declare_attr_set()) ASSERTION( lo-&gt;ldo_stripe ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="di.wang">Di Wang</assignee>
                                    <reporter username="paf">Patrick Farrell</reporter>
                        <labels>
                            <label>HB</label>
                            <label>dne2</label>
                    </labels>
                <created>Thu, 19 Jun 2014 18:26:18 +0000</created>
                <updated>Thu, 26 Jun 2014 12:56:21 +0000</updated>
                            <resolved>Thu, 26 Jun 2014 12:56:21 +0000</resolved>
                                                    <fixVersion>Lustre 2.6.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="87079" author="paf" created="Thu, 19 Jun 2014 19:35:56 +0000"  >&lt;p&gt;MDS dump will here in &amp;lt; 10 minutes:&lt;br/&gt;
ftp.cray.com&lt;br/&gt;
u: anonymous&lt;br/&gt;
p: anonymous&lt;/p&gt;

&lt;p&gt;Then:&lt;br/&gt;
cd outbound/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5233&quot; title=&quot;2.6 DNE stress testing: (lod_object.c:930:lod_declare_attr_set()) ASSERTION( lo-&amp;gt;ldo_stripe ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5233&quot;&gt;&lt;del&gt;LU-5233&lt;/del&gt;&lt;/a&gt;/&lt;br/&gt;
And then the file is:&lt;br/&gt;
mds001_mdt000_LU5233.tar.gz&lt;/p&gt;</comment>
                            <comment id="87080" author="paf" created="Thu, 19 Jun 2014 19:40:05 +0000"  >&lt;p&gt;There was also a client which was stuck waiting on a reply from MDS001/MDT000 before it crashed &lt;span class=&quot;error&quot;&gt;&amp;#91;Obviously, there were many time outs after it crashed, but before that.&amp;#93;&lt;/span&gt;, and the times match roughly with those for the stuck thread.  The stuck thread is probably a separate issue from the LBUG, but I don&apos;t want to separate them until we&apos;re further along.&lt;/p&gt;

&lt;p&gt;Here&apos;s the client bug information:&lt;br/&gt;
At 23:33:48, MDS0 died with an LBUG. (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5233&quot; title=&quot;2.6 DNE stress testing: (lod_object.c:930:lod_declare_attr_set()) ASSERTION( lo-&amp;gt;ldo_stripe ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5233&quot;&gt;&lt;del&gt;LU-5233&lt;/del&gt;&lt;/a&gt;)&lt;/p&gt;

&lt;p&gt;One of the client nodes got stuck before that - This is thread refusing to exit because it&apos;s stuck in Lustre (Many other client threads were also stuck behind this one for the MDC rpc lock in mdc_close):&lt;br/&gt;
console-20140618:2014-06-18T23:07:16.160830-05:00 c0-0c1s4n2 &amp;lt;node_health:5.1&amp;gt; APID:1236942 (Application_Exited_Check) WARNING: Stack trace for process 13769:&lt;br/&gt;
console-20140618:2014-06-18T23:07:16.261778-05:00 c0-0c1s4n2 &amp;lt;node_health:5.1&amp;gt; APID:1236942 (Application_Exited_Check) STACK: &lt;br/&gt;
ptlrpc_set_wait+0x2e5/0x8c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;; &lt;br/&gt;
ptlrpc_queue_wait+0x8b/0x230 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;; &lt;br/&gt;
mdc_close+0x1ed/0xa50 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdc&amp;#93;&lt;/span&gt;; &lt;br/&gt;
lmv_close+0x242/0x5b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lmv&amp;#93;&lt;/span&gt;; &lt;br/&gt;
ll_close_inode_openhandle+0x2fa/0x10a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;; &lt;br/&gt;
ll_md_real_close+0xb0/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;; &lt;br/&gt;
ll_file_release+0x68c/0xb60 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;; &lt;br/&gt;
fput+0xe2/0x200; &lt;br/&gt;
filp_close+0x63/0x90; &lt;br/&gt;
put_files_struct+0x84/0xe0; &lt;br/&gt;
exit_files+0x53/0x70; &lt;br/&gt;
do_exit+0x1ec/0x990; &lt;br/&gt;
do_group_exit+0x4c/0xc0; &lt;br/&gt;
get_signal_to_deliver+0x243/0x490; &lt;br/&gt;
do_notify_resume+0xe0/0x7f0; &lt;br/&gt;
int_signal+0x12/0x17; &lt;br/&gt;
0x20061a87; &lt;br/&gt;
0xffffffffffffffff;&lt;/p&gt;

&lt;p&gt;The client is waiting for a ptlrpc reply. I strongly suspect this corresponds to the stuck thread messages on the MDS.&lt;br/&gt;
Unfortunately, by the time the node was dumped, the client had given up waiting and all of the tasks have exited (and the dk log is empty).  So there&apos;s no way to confirm from the client side.&lt;/p&gt;

&lt;p&gt;The first stuck thread messages on the MDS come here:&lt;/p&gt;

&lt;p&gt;Jun 18 23:16:36 galaxy-esf-mds001 kernel: INFO: task mdt01_020:26426 blocked for more than 120 seconds.&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c6bd75&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_hash_bd_lookup_intent+0x65/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d21fc4&amp;gt;&amp;#93;&lt;/span&gt; ? htable_lookup+0x1c4/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d225db&amp;gt;&amp;#93;&lt;/span&gt; lu_object_find_at+0xab/0x350 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81065df0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d22896&amp;gt;&amp;#93;&lt;/span&gt; lu_object_find+0x16/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa14e2ea6&amp;gt;&amp;#93;&lt;/span&gt; mdt_object_find+0x56/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa14e4d2b&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x75b/0xca0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f8e899&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x369/0x930 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fb7d8f&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x4ef/0x10b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1039f02&amp;gt;&amp;#93;&lt;/span&gt; tgt_enqueue+0x62/0x1d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa103a2ac&amp;gt;&amp;#93;&lt;/span&gt; tgt_request_handle+0x23c/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fe9d1a&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xd1a/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0fe9000&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aee6&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ae50&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;And are repeated up until when it LBUGged (always the same task).&lt;/p&gt;

&lt;p&gt;The stuck thread message from the client is coming on task exit, so it&apos;s already been stuck for some amount of time.  The first stuck thread message on the MDS (Stuck for 600 seconds) comes 9 minutes or so after the client reports a stuck thread.  So the time frames are pretty good.&lt;/p&gt;

&lt;p&gt;Without digging through data structures on the MDS I can&apos;t be sure, it seems likely the stuck thread on the MDS is the cause of the problem on the client.&lt;/p&gt;</comment>
                            <comment id="87182" author="jlevi" created="Fri, 20 Jun 2014 17:09:27 +0000"  >&lt;p&gt;Di,&lt;br/&gt;
Can you please have a look at this one and complete an initial assessment to determine if this should be a blocker for 2.6?&lt;/p&gt;</comment>
                            <comment id="87220" author="di.wang" created="Sat, 21 Jun 2014 00:30:52 +0000"  >&lt;p&gt;Jodi: &lt;/p&gt;

&lt;p&gt;Yes, since it is a LBUG, probably could be a blocker, or at least critical one.  But I think I know the reason, I will cook a patch soon.&lt;/p&gt;</comment>
                            <comment id="87225" author="di.wang" created="Sat, 21 Jun 2014 04:29:47 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/10772&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10772&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87557" author="jlevi" created="Thu, 26 Jun 2014 12:56:21 +0000"  >&lt;p&gt;Patch landed to Master. Please reopen ticket if there is more work needed.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="25170">LU-5204</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwph3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14584</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>