<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:22:18 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8990] Failback LBUG lod_device_free()) ASSERTION( atomic_read(&amp;lu-&gt;ld_ref)</title>
                <link>https://jira.whamcloud.com/browse/LU-8990</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;System completed failover of lola-8 to lola-9 (MDS failover) &lt;br/&gt;
Attempting to tailback, trigger assertion. &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&amp;lt;4&amp;gt;Lustre: Failing over soaked-MDT0000
&amp;lt;3&amp;gt;LustreError: 34433:0:(osp_precreate.c:912:osp_precreate_cleanup_orphans()) soaked-OST0000-osc-MDT0000: cannot cleanup orphans: rc = -5
&amp;lt;3&amp;gt;LustreError: 34433:0:(osp_precreate.c:912:osp_precreate_cleanup_orphans()) Skipped 10 previous similar messages
&amp;lt;6&amp;gt;Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.126@o2ib100 (stopping)
&amp;lt;6&amp;gt;Lustre: Skipped 3 previous similar messages
&amp;lt;3&amp;gt;LustreError: 34664:0:(lod_qos.c:208:lod_statfs_and_check()) soaked-MDT0000-mdtlov: statfs: rc = -108
&amp;lt;3&amp;gt;LustreError: 34664:0:(lod_qos.c:208:lod_statfs_and_check()) Skipped 43 previous similar messages
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:882:ldlm_resource_complain()) mdt-soaked-MDT0000_UUID: namespace resource [0x20002ac2a:0x1fbe:0x0].0xe8519c29 (ffff8803f859fec0) refcount nonzero (1) after lock cleanup; forcing cleanup.
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:882:ldlm_resource_complain()) Skipped 5 previous similar messages
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac2a:0x1fbe:0x0].0xe8519c29 (ffff8803f859fec0) refcount = 2
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1469:ldlm_resource_dump()) ### ### ns: mdt-soaked-MDT0000_UUID lock: ffff8804001e4b40/0xda913c453295768d lrc: 2/0,1 mode: PW/PW res: [0x20002ac2a:0x1fbe:0x0].0xe8519c29 bits 0x2 rrc: 2 type: IBT flags: 0x40316400000000 nid: local remote: 0x0 expref: -99 pid: 34549 timeout: 0 lvb_type: 0
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1469:ldlm_resource_dump()) Skipped 4 previous similar messages
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac14:0xd2e4:0x0].0x786cbc9c (ffff8807f5e87b00) refcount = 5
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1484:ldlm_resource_dump()) Waiting locks:
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1486:ldlm_resource_dump()) ### ### ns: mdt-soaked-MDT0000_UUID lock: ffff880836be2b80/0xda913c4532957639 lrc: 2/0,1 mode: --/PW res: [0x20002ac14:0xd2e4:0x0].0x786cbc9c bits 0x2 rrc: 5 type: IBT flags: 0x40316400000020 nid: local remote: 0x0 expref: -99 pid: 34463 timeout: 0 lvb_type: 0
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac2a:0x1fbe:0x0].0xc7d60cfe (ffff8807f5e87a40) refcount = 2
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac2a:0x1fbe:0x0].0x0 (ffff88041165ac00) refcount = 9
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac2a:0x1fbe:0x0].0x78a9f709 (ffff880401cd9b40) refcount = 2
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac2a:0x1fbe:0x0].0xd5a94b89 (ffff8803f859fbc0) refcount = 2
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1463:ldlm_resource_dump()) --- Resource: [0x20002ac14:0xd2e4:0x0].0x0 (ffff8807fbdccf00) refcount = 9
&amp;lt;3&amp;gt;LustreError: 34700:0:(ldlm_resource.c:1466:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 9135:0:(client.c:1166:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff8803f0bb0c80 x1555552176176736/t0(0) o13-&amp;gt;soaked-OST000f-osc-MDT0000@192.168.1.105@o2ib10:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
&amp;lt;3&amp;gt;LustreError: 9135:0:(client.c:1166:ptlrpc_import_delay_req()) Skipped 6 previous similar messages
&amp;lt;6&amp;gt;Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.107@o2ib10 (stopping)
&amp;lt;6&amp;gt;Lustre: Skipped 7 previous similar messages
&amp;lt;6&amp;gt;Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.117@o2ib100 (stopping)
&amp;lt;6&amp;gt;Lustre: Skipped 15 previous similar messages
&amp;lt;6&amp;gt;Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.130@o2ib100 (stopping)
&amp;lt;6&amp;gt;Lustre: Skipped 6 previous similar messages
&amp;lt;3&amp;gt;LustreError: 0-0: Forced cleanup waiting &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; mdt-soaked-MDT0000_UUID namespace with 6 resources in use, (rc=-110)
&amp;lt;3&amp;gt;LustreError: 0-0: Forced cleanup waiting &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; mdt-soaked-MDT0000_UUID namespace with 6 resources in use, (rc=-110)
&amp;lt;6&amp;gt;Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.133@o2ib100 (stopping)
&amp;lt;6&amp;gt;Lustre: Skipped 9 previous similar messages
&amp;lt;6&amp;gt;Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.106@o2ib10 (stopping)
&amp;lt;6&amp;gt;Lustre: Skipped 23 previous similar messages
&amp;lt;3&amp;gt;LustreError: 0-0: Forced cleanup waiting &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; mdt-soaked-MDT0000_UUID namespace with 6 resources in use, (rc=-110)
&amp;lt;3&amp;gt;LustreError: 34661:0:(lod_qos.c:208:lod_statfs_and_check()) soaked-MDT0000-mdtlov: statfs: rc = -108
&amp;lt;3&amp;gt;LustreError: 34661:0:(lod_qos.c:208:lod_statfs_and_check()) Skipped 201 previous similar messages
&amp;lt;0&amp;gt;LustreError: 34700:0:(mdt_handler.c:4565:mdt_fini()) ASSERTION( atomic_read(&amp;amp;d-&amp;gt;ld_ref) == 0 ) failed:
&amp;lt;0&amp;gt;LustreError: 9095:0:(lod_dev.c:1654:lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) failed: lu is ffff8803ffd54000
&amp;lt;0&amp;gt;LustreError: 9095:0:(lod_dev.c:1654:lod_device_free()) LBUG
&amp;lt;4&amp;gt;Pid: 9095, comm: obd_zombid
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;A crash dump is available on lola-9 -/var/crash/127.0.0.1-2017-01-05-05:48:01&lt;/p&gt;</description>
                <environment>Soak cluster lustre: 2.9.51_4_g39af202 - tip of master on 12/30</environment>
        <key id="42780">LU-8990</key>
            <summary>Failback LBUG lod_device_free()) ASSERTION( atomic_read(&amp;lu-&gt;ld_ref)</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Thu, 5 Jan 2017 16:48:23 +0000</created>
                <updated>Sat, 3 Nov 2018 01:54:39 +0000</updated>
                            <resolved>Tue, 27 Feb 2018 04:27:17 +0000</resolved>
                                    <version>Lustre 2.10.0</version>
                    <version>Lustre 2.11.0</version>
                    <version>Lustre 2.10.2</version>
                    <version>Lustre 2.10.3</version>
                                    <fixVersion>Lustre 2.11.0</fixVersion>
                    <fixVersion>Lustre 2.10.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="179746" author="pjones" created="Thu, 5 Jan 2017 18:07:52 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Does this seem familiar to an issue you are already working on?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="180027" author="laisiyao" created="Mon, 9 Jan 2017 10:40:05 +0000"  >&lt;p&gt;This looks to be caused by leaked lock, but &apos;crash struct&apos; shows all the listed locks and resources have been released (the content is 5a5a..), while &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7546&quot; title=&quot;conf-sanity conf-sanity: lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7546&quot;&gt;&lt;del&gt;LU-7546&lt;/del&gt;&lt;/a&gt; has already been fixed, this may be a different issue.&lt;/p&gt;

&lt;p&gt;More notes from logs: the listed locks still have users (mostly writers), and they are not converted into COS lock yet, so when the message is printed, they should still be in mdt handing, or were forgotten to unlock like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7546&quot; title=&quot;conf-sanity conf-sanity: lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7546&quot;&gt;&lt;del&gt;LU-7546&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="195717" author="pjones" created="Fri, 12 May 2017 18:12:44 +0000"  >&lt;p&gt;Cliff&lt;/p&gt;

&lt;p&gt;Is this still a live issue on soak or has it been replaced by new issues on more recent runs?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="195718" author="cliffw" created="Fri, 12 May 2017 18:14:34 +0000"  >&lt;p&gt;I have not seen it recently&lt;/p&gt;</comment>
                            <comment id="195719" author="pjones" created="Fri, 12 May 2017 18:19:04 +0000"  >&lt;p&gt;ok- thanks Cliff. So I will remove the Fix Version for now. It&apos;s still possible this is just a really rare edge case and it might come up again but it seems like our time would be better spent on other issues for 2.10 for the time being&lt;/p&gt;</comment>
                            <comment id="201627" author="cliffw" created="Tue, 11 Jul 2017 02:19:09 +0000"  >&lt;p&gt;Hit this again immediately on lustre b2_10 build 3&lt;/p&gt;
{code]
[ 7028.656544] Lustre: Failing over soaked-MDT0000
[ 7028.693228] Lustre: soaked-MDT0000: Not available for connect from 192.168.1.120@o2ib (stopping)
[ 7028.704412] Lustre: Skipped 2 previous similar messages
[ 7028.723793] LustreError: 4767:0:(lod_qos.c:208:lod_statfs_and_check()) soaked-MDT0000-mdtlov: statfs: rc = -108
[ 7028.730576] LustreError: 12528:0:(ldlm_resource.c:1094:ldlm_resource_complain()) mdt-soaked-MDT0000_UUID: namespace resource [0x200000bf1:0x357e:0x0].0x5dedd0e5 (ffff8807f3886c00) refcount nonzero (2) after lock cleanup; forcing cleanup.
[ 7028.730579] LustreError: 12528:0:(ldlm_resource.c:1676:ldlm_resource_dump()) --- Resource: [0x200000bf1:0x357e:0x0].0x5dedd0e5 (ffff8807f3886c00) refcount = 3
[ 7028.730580] LustreError: 12528:0:(ldlm_resource.c:1679:ldlm_resource_dump()) Granted locks (in reverse order):
[ 7028.730587] LustreError: 12528:0:(ldlm_resource.c:1682:ldlm_resource_dump()) ### ### ns: mdt-soaked-MDT0000_UUID lock: ffff880774c4d200/0xaff450bee03d5e2e lrc: 2/0,1 mode: PW/PW res: [0x200000bf1:0x357e:0x0].0x5dedd0e5 bits 0x2 rrc: 4 type: IBT flags: 0x40210400000020 nid: local remote: 0x0 expref: -99 pid: 4767 timeout: 0 lvb_type: 0
[ 7028.730589] LustreError: 12528:0:(ldlm_resource.c:1697:ldlm_resource_dump()) Waiting locks:
[ 7028.730593] LustreError: 12528:0:(ldlm_resource.c:1699:ldlm_resource_dump()) ### ### ns: mdt-soaked-MDT0000_UUID lock: ffff8804072cb800/0xaff450bee03d5e35 lrc: 3/0,1 mode: --/PW res: [0x200000bf1:0x357e:0x0].0x5dedd0e5 bits 0x2 rrc: 4 type: IBT flags: 0x40210000000000 nid: local remote: 0x0 expref: -99 pid: 4652 timeout: 0 lvb_type: 0
[ 7028.732498] LustreError: 12528:0:(ldlm_resource.c:1676:ldlm_resource_dump()) --- Resource: [0x200000bf1:0x357e:0x0].0x0 (ffff880777c28780) refcount = 5
[ 7028.732499] LustreError: 12528:0:(ldlm_resource.c:1697:ldlm_resource_dump()) Waiting locks:
[ 7028.850045] LustreError: 4222:0:(client.c:1166:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff88077a390000 x1572582403710608/t0(0) o13-&amp;gt;soaked-OST000e-osc-MDT0000@192.168.1.104@o2ib:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
[ 7030.100044] Lustre: soaked-MDT0000: Not available for connect from 192.168.1.107@o2ib (stopping)
[ 7030.100045] Lustre: soaked-MDT0000: Not available for connect from 192.168.1.107@o2ib (stopping)
[ 7030.100047] Lustre: Skipped 6 previous similar messages
[ 7030.134373] Lustre: Skipped 2 previous similar messages
[ 7031.012257] LustreError: 4154:0:(lod_dev.c:1672:lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) failed: lu is ffff8807878f2000
{code}
&lt;p&gt;Looks like the node is wedged&lt;/p&gt;</comment>
                            <comment id="216211" author="cliffw" created="Wed, 13 Dec 2017 22:03:15 +0000"  >&lt;p&gt;Hit this again. version=2.10.2_RC1_4_g2022d41&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[ 2083.015096] Lustre: 2491:0:(service.c:2112:ptlrpc_server_handle_request()) Skipped 1 previous similar message
[ 2083.338528] LustreError: 2313:0:(lod_qos.c:208:lod_statfs_and_check()) Skipped 113 previous similar messages
[ 2089.418303] LustreError: 2159:0:(lod_dev.c:1672:lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) failed: lu is ffff8803b71b0000
[ 2089.418369] LustreError: 3007:0:(mdt_handler.c:4808:mdt_fini()) ASSERTION( atomic_read(&amp;amp;d-&amp;gt;ld_ref) == 0 ) failed:
[ 2089.418371] LustreError: 3007:0:(mdt_handler.c:4808:mdt_fini()) LBUG
[ 2089.418372] Pid: 3007, comm: umount
[ 2089.418373]
Call Trace:
[ 2089.418397]  [&amp;lt;ffffffffc09717ae&amp;gt;] libcfs_call_trace+0x4e/0x60 [libcfs]
[ 2089.418405]  [&amp;lt;ffffffffc097183c&amp;gt;] lbug_with_loc+0x4c/0xb0 [libcfs]
[ 2089.418431]  [&amp;lt;ffffffffc16cf832&amp;gt;] mdt_device_fini+0x8e2/0x920 [mdt]
[ 2089.418470]  [&amp;lt;ffffffffc0a8c921&amp;gt;] class_cleanup+0x971/0xcd0 [obdclass]
[ 2089.418497]  [&amp;lt;ffffffffc0a8ecbd&amp;gt;] class_process_config+0x19cd/0x23b0 [obdclass]
[ 2089.418508]  [&amp;lt;ffffffffc097cbc7&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
[ 2089.418534]  [&amp;lt;ffffffffc0a8f866&amp;gt;] class_manual_cleanup+0x1c6/0x710 [obdclass]
[ 2089.418564]  [&amp;lt;ffffffffc0abdffe&amp;gt;] server_put_super+0x8de/0xcd0 [obdclass]
[ 2089.418571]  [&amp;lt;ffffffff81203692&amp;gt;] generic_shutdown_super+0x72/0x100
[ 2089.418574]  [&amp;lt;ffffffff81203a62&amp;gt;] kill_anon_super+0x12/0x20
[ 2089.418600]  [&amp;lt;ffffffffc0a92162&amp;gt;] lustre_kill_super+0x32/0x50 [obdclass]
[ 2089.418602]  [&amp;lt;ffffffff81203e19&amp;gt;] deactivate_locked_super+0x49/0x60
[ 2089.418604]  [&amp;lt;ffffffff81204586&amp;gt;] deactivate_super+0x46/0x60
[ 2089.418610]  [&amp;lt;ffffffff812217cf&amp;gt;] cleanup_mnt+0x3f/0x80
[ 2089.418612]  [&amp;lt;ffffffff81221862&amp;gt;] __cleanup_mnt+0x12/0x20
[ 2089.418618]  [&amp;lt;ffffffff810ad275&amp;gt;] task_work_run+0xc5/0xf0
[ 2089.418622]  [&amp;lt;ffffffff8102ab62&amp;gt;] do_notify_resume+0x92/0xb0
[ 2089.418627]  [&amp;lt;ffffffff816b533d&amp;gt;] int_signal+0x12/0x17
[ 2089.418628]
[ 2089.418629] Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Crash dump available on spirit.&lt;/p&gt;</comment>
                            <comment id="218001" author="cliffw" created="Thu, 11 Jan 2018 16:34:45 +0000"  >&lt;p&gt;Hit this again on 2.10.3-RC1 - It may be rare outside but we seem to hit it at least once every test cycle. This time it occurred when failing back from MDS failover.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Jan 11 00:20:11 soak-9 kernel: LustreError: 22047:0:(lod_qos.c:208:lod_statfs_and_check()) soaked-MDT0000-mdtlov: statfs: rc = -108
Jan 11 00:20:11 soak-9 kernel: LustreError: 22047:0:(lod_qos.c:208:lod_statfs_and_check()) Skipped 931 previous similar messages
Jan 11 00:20:11 soak-9 kernel: Lustre: 22047:0:(service.c:2112:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (42:58s); client may timeout.  req@ffff88081c680f00 x1589236901262384/t0(0) o101-&amp;gt;0940640e-d454-688a-a4a6-941fc69c399c@192.168.1.117@o2ib:673/0 lens 1728/544 e 0 to 0 dl 1515629953 ref 1 fl Complete:/0/0 rc -19/-19
Jan 11 00:20:11 soak-9 kernel: Lustre: 22047:0:(service.c:2112:ptlrpc_server_handle_request()) Skipped 1 previous similar message
Jan 11 00:20:13 soak-9 kernel: LustreError: 22287:0:(ldlm_lockd.c:1415:ldlm_handle_enqueue0()) ### lock on destroyed export ffff8803ca2ca400 ns: mdt-soaked-MDT0000_UUID lock: ffff8803ca63c800/0xf21e83d58af83b34 lrc: 3/0,0 mode: CR/CR res: [0x200025e04:0x1fd72:0x0].0x0 bits 0x9 rrc: 2 type: IBT flags: 0x50200000000000 nid: 192.168.1.117@o2ib remote: 0xd319047699a98ef6 expref: 3 pid: 22287 timeout: 0 lvb_type: 0
Jan 11 00:20:13 soak-9 kernel: LustreError: 22044:0:(lod_qos.c:208:lod_statfs_and_check()) soaked-MDT0000-mdtlov: statfs: rc = -108
Jan 11 00:20:13 soak-9 kernel: LustreError: 22044:0:(lod_qos.c:208:lod_statfs_and_check()) Skipped 139 previous similar messages
Jan 11 00:20:13 soak-9 kernel: Lustre: 22364:0:(service.c:2112:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (42:59s); client may timeout.  req@ffff8803cca4b600 x1589235586212928/t0(0) o101-&amp;gt;55640ac0-502d-db6b-5093-b453b7dbb0cb@192.168.1.118@o2ib:673/0 lens 1728/544 e 0 to 0 dl 1515629953 ref 1 fl Complete:/0/0 rc -19/-19
Jan 11 00:20:13 soak-9 kernel: LustreError: 22287:0:(ldlm_lockd.c:1415:ldlm_handle_enqueue0()) Skipped 1 previous similar message
Jan 11 00:20:13 soak-9 kernel: LustreError: 0-0: Forced cleanup waiting &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; mdt-soaked-MDT0000_UUID namespace with 1 resources in use, (rc=-110)
Jan 11 00:20:17 soak-9 kernel: Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.119@o2ib (stopping)
Jan 11 00:20:17 soak-9 kernel: Lustre: Skipped 14 previous similar messages
Jan 11 00:20:19 soak-9 kernel: LustreError: 22401:0:(lod_dev.c:1672:lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) failed: lu is ffff8803bdb8e000
Jan 11 00:20:19 soak-9 kernel: LustreError: 22401:0:(lod_dev.c:1672:lod_device_free()) LBUG
Jan 11 00:20:19 soak-9 kernel: Pid: 22401, comm: umount
Jan 11 00:20:19 soak-9 kernel: #012Call Trace:
Jan 11 00:20:19 soak-9 kernel: [&amp;lt;ffffffffc0de47ae&amp;gt;] libcfs_call_trace+0x4e/0x60 [libcfs]
Jan 11 00:20:19 soak-9 kernel: [&amp;lt;ffffffffc0de483c&amp;gt;] lbug_with_loc+0x4c/0xb0 [libcfs]
Jan 11 00:20:19 soak-9 kernel: [&amp;lt;ffffffffc1657f76&amp;gt;] lod_device_free+0x296/0x2a0 [lod]
Jan 11 00:20:19 soak-9 kernel: [&amp;lt;ffffffffc0ee373e&amp;gt;] class_free_dev+0x54e/0x7f0 [obdclass]
Jan 11 00:20:19 soak-9 kernel: [&amp;lt;ffffffffc0ee3c00&amp;gt;] class_export_put+0x220/0x2f0 [obdclass]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc0ee56f5&amp;gt;] class_unlink_export+0x135/0x170 [obdclass]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc0efad70&amp;gt;] class_decref+0x80/0x160 [obdclass]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc0efb1d3&amp;gt;] class_detach+0x1b3/0x2e0 [obdclass]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc0f01eb8&amp;gt;] class_process_config+0x1a28/0x23f0 [obdclass]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffff810c93f5&amp;gt;] ? sched_clock_cpu+0x85/0xc0
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc0defbc7&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc0f02a60&amp;gt;] class_manual_cleanup+0x1e0/0x710 [obdclass]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc16579c3&amp;gt;] lod_obd_disconnect+0x93/0x1c0 [lod]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc16ccbc9&amp;gt;] mdd_process_config+0x289/0x600 [mdd]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc152a4cc&amp;gt;] mdt_stack_fini+0x2bc/0xd40 [mdt]
Jan 11 00:20:20 soak-9 kernel: [&amp;lt;ffffffffc152b423&amp;gt;] mdt_device_fini+0x4d3/0x920 [mdt]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffffc0effac1&amp;gt;] class_cleanup+0x971/0xcd0 [obdclass]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffffc0f01e22&amp;gt;] class_process_config+0x1992/0x23f0 [obdclass]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffffc0defbc7&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffffc0f02a46&amp;gt;] class_manual_cleanup+0x1c6/0x710 [obdclass]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffffc0f30f8e&amp;gt;] server_put_super+0x8de/0xcd0 [obdclass]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff812054d2&amp;gt;] generic_shutdown_super+0x72/0x100
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff812058a2&amp;gt;] kill_anon_super+0x12/0x20
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffffc0f05342&amp;gt;] lustre_kill_super+0x32/0x50 [obdclass]
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff81205c59&amp;gt;] deactivate_locked_super+0x49/0x60
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff812063c6&amp;gt;] deactivate_super+0x46/0x60
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff8122376f&amp;gt;] cleanup_mnt+0x3f/0x80 
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff81223802&amp;gt;] __cleanup_mnt+0x12/0x20  
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff810aee05&amp;gt;] task_work_run+0xc5/0xf0
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff8102ab52&amp;gt;] do_notify_resume+0x92/0xb0
Jan 11 00:20:21 soak-9 kernel: [&amp;lt;ffffffff816b8d37&amp;gt;] int_signal+0x12/0x17
Jan 11 00:20:22 soak-9 kernel:
Jan 11 00:20:22 soak-9 kernel: Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="219150" author="cliffw" created="Thu, 25 Jan 2018 16:13:36 +0000"  >&lt;p&gt;Hit this again on 2.10.57 -&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Jan 25 00:15:03 soak-9 kernel: LustreError: 3830:0:(lod_qos.c:208:lod_statfs_and_check()) Skipped 46 previous similar messages
Jan 25 00:15:04 soak-9 kernel: Lustre: 3753:0:(service.c:2124:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (42:64s); client may timeout.  req@ffff8803d2008c00 x1590514452745184/t0(0) o101-&amp;gt;7f42689f-29c1-6234-8d58-8721a9f86699@192.168.1.125@o2ib:449/0 lens 1728/544 e 0 to 0 dl 1516839239 ref 1 fl Complete:/0/0 rc -19/-19
Jan 25 00:15:04 soak-9 kernel: LustreError: 3830:0:(ldlm_lockd.c:1362:ldlm_handle_enqueue0()) ### lock on destroyed export ffff8808177d1c00 ns: mdt-soaked-MDT0000_UUID lock: ffff880816f58b40/0xb5b8772267a2d8f2 lrc: 3/0,0 mode: CR/CR res: [0x20006e1f6:0x16d28:0x0].0x0 bits 0x8/0x0 rrc: 2 type: IBT flags: 0x50200000000000 nid: 192.168.1.117@o2ib remote: 0xb6333a2c30ac9e3b expref: 3 pid: 3830 timeout: 0 lvb_type: 0
Jan 25 00:15:04 soak-9 kernel: Lustre: 3753:0:(service.c:2124:ptlrpc_server_handle_request()) Skipped 1 previous similar message
Jan 25 00:15:07 soak-9 kernel: Lustre: soaked-MDT0000: Not available &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; connect from 192.168.1.134@o2ib (stopping)
Jan 25 00:15:07 soak-9 kernel: Lustre: Skipped 10 previous similar messages
Jan 25 00:15:09 soak-9 kernel: LustreError: 3915:0:(lod_dev.c:1687:lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref) == 0 ) failed: lu is ffff88082668e000
Jan 25 00:15:09 soak-9 kernel: LustreError: 3915:0:(lod_dev.c:1687:lod_device_free()) LBUG
Jan 25 00:15:09 soak-9 kernel: Pid: 3915, comm: umount
Jan 25 00:15:09 soak-9 kernel: #012Call Trace:
Jan 25 00:15:09 soak-9 kernel: [&amp;lt;ffffffffc096f7ae&amp;gt;] libcfs_call_trace+0x4e/0x60 [libcfs]
Jan 25 00:15:09 soak-9 kernel: [&amp;lt;ffffffffc096f83c&amp;gt;] lbug_with_loc+0x4c/0xb0 [libcfs]
Jan 25 00:15:09 soak-9 kernel: [&amp;lt;ffffffffc1865f76&amp;gt;] lod_device_free+0x296/0x2a0 [lod] 
Jan 25 00:15:09 soak-9 kernel: [&amp;lt;ffffffffc0a7ec4c&amp;gt;] class_free_dev+0x4dc/0x730 [obdclass]
Jan 25 00:15:09 soak-9 kernel: [&amp;lt;ffffffffc0a7f0c0&amp;gt;] class_export_put+0x220/0x2f0 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a80bd5&amp;gt;] class_unlink_export+0x135/0x170 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a96570&amp;gt;] class_decref+0x80/0x160 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a969d3&amp;gt;] class_detach+0x1b3/0x2e0 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a9d5e9&amp;gt;] class_process_config+0x19d9/0x28a0 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc097ad47&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a9e690&amp;gt;] class_manual_cleanup+0x1e0/0x710 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc18659c3&amp;gt;] lod_obd_disconnect+0x93/0x1c0 [lod]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc18e3d1e&amp;gt;] mdd_process_config+0x3de/0x630 [mdd]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc177827c&amp;gt;] mdt_stack_fini+0x2bc/0xd40 [mdt]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc1779083&amp;gt;] mdt_device_fini+0x383/0x970 [mdt] 
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a9b21c&amp;gt;] class_cleanup+0x8cc/0xc40 [obdclass]
Jan 25 00:15:10 soak-9 kernel: [&amp;lt;ffffffffc0a9c23c&amp;gt;] class_process_config+0x62c/0x28a0 [obdclass]
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffffc097ad47&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffffc0a9e676&amp;gt;] class_manual_cleanup+0x1c6/0x710 [obdclass]
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffffc0ace25e&amp;gt;] server_put_super+0x8de/0xcd0 [obdclass]
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff812054d2&amp;gt;] generic_shutdown_super+0x72/0x100
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff812058a2&amp;gt;] kill_anon_super+0x12/0x20
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffffc0aa10c2&amp;gt;] lustre_kill_super+0x32/0x50 [obdclass]
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff81205c59&amp;gt;] deactivate_locked_super+0x49/0x60 
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff812063c6&amp;gt;] deactivate_super+0x46/0x60
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff8122376f&amp;gt;] cleanup_mnt+0x3f/0x80 
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff81223802&amp;gt;] __cleanup_mnt+0x12/0x20
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff810aee05&amp;gt;] task_work_run+0xc5/0xf0
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff8102ab52&amp;gt;] do_notify_resume+0x92/0xb0
Jan 25 00:15:11 soak-9 kernel: [&amp;lt;ffffffff816b8d37&amp;gt;] int_signal+0x12/0x17
Jan 25 00:15:12 soak-9 kernel:
Jan 25 00:15:12 soak-9 kernel: Kernel panic - not syncing: LBUG
Jan 25 00:19:04 soak-9 systemd: Starting Stop Read-Ahead Data Collection...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Crash dumps are available on spirit&lt;/p&gt;</comment>
                            <comment id="219660" author="cliffw" created="Thu, 1 Feb 2018 16:07:21 +0000"  >&lt;p&gt;Hit this again on version=2.10.57_57_g98ddc99, basically blocks most soak/failover tests&lt;/p&gt;</comment>
                            <comment id="219842" author="gerrit" created="Fri, 2 Feb 2018 15:05:09 +0000"  >&lt;p&gt;Lai Siyao (lai.siyao@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/31143&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31143&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8990&quot; title=&quot;Failback LBUG lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8990&quot;&gt;&lt;del&gt;LU-8990&lt;/del&gt;&lt;/a&gt; debug: dump objects in lod_device_free&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: cd8a85a3100c1b3aaea6e61a86b7057dade33341&lt;/p&gt;</comment>
                            <comment id="219843" author="laisiyao" created="Fri, 2 Feb 2018 15:06:25 +0000"  >&lt;p&gt;Hi Cliff, I uploaded a debug patch, will you test soak/failover with it?&lt;/p&gt;</comment>
                            <comment id="220328" author="cliffw" created="Wed, 7 Feb 2018 16:52:33 +0000"  >&lt;p&gt;Ask Minh to trigger an MNLX build and I can test that. Is you patch based on latest master? &lt;/p&gt;</comment>
                            <comment id="220381" author="laisiyao" created="Thu, 8 Feb 2018 02:16:41 +0000"  >&lt;p&gt;Yes, it is.&lt;/p&gt;</comment>
                            <comment id="220439" author="cliffw" created="Thu, 8 Feb 2018 17:30:50 +0000"  >&lt;p&gt;Hit the problem with your patch, relevent syslog attached, core dumped, vmcore-dmesg.txt attached, full data is available on Spirit: crash dump is at: /scratch/dumps/soak-9.spirit.hpdd.intel018-02-08-08\:03\:14/&lt;/p&gt;</comment>
                            <comment id="220520" author="laisiyao" created="Fri, 9 Feb 2018 02:45:02 +0000"  >&lt;p&gt;Thanks Cliff, it&apos;s quite helpful.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;45593.537511&amp;#93;&lt;/span&gt; LustreError: 8499:0:(lod_dev.c:1688:lod_device_free()) header@ffff8807dc04ee80[0x0, 1, &lt;span class=&quot;error&quot;&gt;&amp;#91;0x200000007:0x1:0x0&amp;#93;&lt;/span&gt; hash exist]&lt;/p&gt;
{

[45593.591495] LustreError: 8499:0:(lod_dev.c:1688:lod_device_free()) ....mdt@ffff8807dc04eed0mdt-object@ffff8807dc04ee80( , writecount=0)

[45593.645796] LustreError: 8499:0:(lod_dev.c:1688:lod_device_free()) ....mdd@ffff8803cadf5820mdd-object@ffff8803cadf5820(open_count=0, valid=0, cltime=0ns, flags=0)
{no format}

&lt;p&gt;The message shows &apos;root&apos; is not released at lod_device_free(), I&apos;ll look into related code.&lt;/p&gt;</comment>
                            <comment id="220571" author="laisiyao" created="Fri, 9 Feb 2018 14:46:41 +0000"  >&lt;p&gt;I still need to access the crash dump to verify some details, but I don&apos;t have account on Spirit yet, and I just created a ticket for it: &lt;a href=&quot;https://jira.hpdd.intel.com/browse/DCO-7884&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/DCO-7884&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="220703" author="laisiyao" created="Sun, 11 Feb 2018 07:24:25 +0000"  >&lt;p&gt;The crash dump shows lod-&amp;gt;lod_md_root is not NULL at lod_device_free(), which means lod_md_root is released too early (in precleanup), and some request re-initialized it after that, I&apos;ll move it to real cleanup time.&lt;/p&gt;</comment>
                            <comment id="220704" author="laisiyao" created="Sun, 11 Feb 2018 08:08:48 +0000"  >&lt;p&gt;Hi Cliff, I just updated the patch, could you do soak test again?&lt;/p&gt;</comment>
                            <comment id="220815" author="cliffw" created="Tue, 13 Feb 2018 00:23:48 +0000"  >&lt;p&gt;Not seeing any hard faults yet, but many watchdogs/hanging threads. &lt;br/&gt;
Mostly from recovery after OSS failover. &lt;br/&gt;
example&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Feb 12 22:24:35 soak-8 kernel: LNet: Service thread pid 2499 was inactive &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 200.49s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; debugging purposes:
Feb 12 22:24:35 soak-8 kernel: Pid: 2499, comm: mdt01_003
Feb 12 22:24:35 soak-8 kernel: #012Call Trace:
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffff81033519&amp;gt;] ? sched_clock+0x9/0x10
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffff816ab6b9&amp;gt;] schedule+0x29/0x70
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffff816a9004&amp;gt;] schedule_timeout+0x174/0x2c0
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffff8109a6c0&amp;gt;] ? process_timeout+0x0/0x10
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc0e21eb1&amp;gt;] ? cfs_block_sigsinv+0x71/0xa0 [libcfs]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc17ae760&amp;gt;] osp_precreate_reserve+0x2e0/0x810 [osp]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffff810c6440&amp;gt;] ? default_wake_function+0x0/0x20
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc17a3c53&amp;gt;] osp_declare_create+0x193/0x590 [osp]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc0f404a9&amp;gt;] ? lprocfs_counter_add+0xf9/0x160 [obdclass]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16f47dc&amp;gt;] lod_sub_declare_create+0xdc/0x210 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16eda4e&amp;gt;] lod_qos_declare_object_on+0xbe/0x3a0 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16ee9ca&amp;gt;] lod_alloc_rr.constprop.18+0x70a/0x1010 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16f317d&amp;gt;] lod_qos_prep_create+0xced/0x1820 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16f000e&amp;gt;] ? lod_alloc_qos.constprop.17+0xd3e/0x1590 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16f420d&amp;gt;] lod_prepare_create+0x25d/0x360 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16e5f7e&amp;gt;] lod_declare_striped_create+0x1ee/0x970 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16f47dc&amp;gt;] ? lod_sub_declare_create+0xdc/0x210 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc16ea2b4&amp;gt;] lod_declare_create+0x204/0x590 [lod]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc0f60619&amp;gt;] ? lu_context_refill+0x19/0x50 [obdclass]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc175c3ef&amp;gt;] mdd_declare_create_object_internal+0xdf/0x2f0 [mdd]
Feb 12 22:24:35 soak-8 kernel: [&amp;lt;ffffffffc174cb63&amp;gt;] mdd_declare_create+0x53/0xe30 [mdd]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc1750e89&amp;gt;] mdd_create+0x879/0x1410 [mdd]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc1605106&amp;gt;] mdt_reint_open+0x2206/0x3260 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc0f73d2e&amp;gt;] ? upcall_cache_get_entry+0x20e/0x8f0 [obdclass]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc15e8b43&amp;gt;] ? ucred_set_jobid+0x53/0x70 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc15f9400&amp;gt;] mdt_reint_rec+0x80/0x210 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc15d8f8b&amp;gt;] mdt_reint_internal+0x5fb/0x9c0 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc15e5437&amp;gt;] mdt_intent_reint+0x157/0x420 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc15dc0b2&amp;gt;] mdt_intent_opc+0x442/0xad0 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc113f470&amp;gt;] ? lustre_swab_ldlm_intent+0x0/0x20 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc15e3c63&amp;gt;] mdt_intent_policy+0x1a3/0x360 [mdt]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc10ef202&amp;gt;] ldlm_lock_enqueue+0x382/0x8f0 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc1117753&amp;gt;] ldlm_handle_enqueue0+0x8f3/0x13e0 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc113f4f0&amp;gt;] ? lustre_swab_ldlm_request+0x0/0x30 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc119d202&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc11a5405&amp;gt;] tgt_request_handle+0x925/0x13b0 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc114958e&amp;gt;] ptlrpc_server_handle_request+0x24e/0xab0 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc1146448&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffff810c6452&amp;gt;] ? default_wake_function+0x12/0x20
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffff810bc0f8&amp;gt;] ? __wake_up_common+0x58/0x90
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc114cd42&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffffc114c2b0&amp;gt;] ? ptlrpc_main+0x0/0x1e40 [ptlrpc]
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffff810b252f&amp;gt;] kthread+0xcf/0xe0
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffff810b2460&amp;gt;] ? kthread+0x0/0xe0
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffff816b8798&amp;gt;] ret_from_fork+0x58/0x90
Feb 12 22:24:36 soak-8 kernel: [&amp;lt;ffffffff810b2460&amp;gt;] ? kthread+0x0/0xe0
Feb 12 22:24:36 soak-8 kernel:
Feb 12 22:24:36 soak-8 kernel: LustreError: dumping log to /tmp/lustre-log.1518474276.2499
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Logs are available on soak - /scratch/logs/syslog. &lt;/p&gt;</comment>
                            <comment id="220826" author="laisiyao" created="Tue, 13 Feb 2018 02:09:46 +0000"  >&lt;p&gt;This should be a different issue, I&apos;ll look into it later.&lt;/p&gt;</comment>
                            <comment id="221010" author="cliffw" created="Wed, 14 Feb 2018 18:35:01 +0000"  >&lt;p&gt;Your new patch has been running several days, I am not seeing any output, or hard failures, I have grep&apos;d for &apos;lod_device&apos; in logs, but seeing nothing, any other strings I should search for to get you the output you need? Should I perhaps force a crash dump? If you have login to the system now, you can also force a dump if you need. &lt;/p&gt;</comment>
                            <comment id="221740" author="gerrit" created="Tue, 27 Feb 2018 03:42:09 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/31143/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31143/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8990&quot; title=&quot;Failback LBUG lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8990&quot;&gt;&lt;del&gt;LU-8990&lt;/del&gt;&lt;/a&gt; lod: put root at cleanup&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 94fc345399b3cd94a96aa4b3f607f2dc9d669a98&lt;/p&gt;</comment>
                            <comment id="221771" author="pjones" created="Tue, 27 Feb 2018 04:27:17 +0000"  >&lt;p&gt;Landed for 2.11&lt;/p&gt;</comment>
                            <comment id="221812" author="gerrit" created="Tue, 27 Feb 2018 15:55:01 +0000"  >&lt;p&gt;Minh Diep (minh.diep@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/31431&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31431&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8990&quot; title=&quot;Failback LBUG lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8990&quot;&gt;&lt;del&gt;LU-8990&lt;/del&gt;&lt;/a&gt; lod: put root at cleanup&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 348a1b52d538b9f26f213d766bef1f359f651e42&lt;/p&gt;</comment>
                            <comment id="225238" author="gerrit" created="Thu, 5 Apr 2018 19:56:11 +0000"  >&lt;p&gt;John L. Hammond (john.hammond@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/31431/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31431/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8990&quot; title=&quot;Failback LBUG lod_device_free()) ASSERTION( atomic_read(&amp;amp;lu-&amp;gt;ld_ref)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8990&quot;&gt;&lt;del&gt;LU-8990&lt;/del&gt;&lt;/a&gt; lod: put root at cleanup&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 34289a7be2e6ba42c6091ccd8835bd8f3eca9385&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="50849">LU-10677</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="51707">LU-10887</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="29514" name="lu-8990.txt" size="20537" author="cliffw" created="Thu, 8 Feb 2018 17:29:48 +0000"/>
                            <attachment id="29515" name="vmcore-dmesg.txt" size="181573" author="cliffw" created="Thu, 8 Feb 2018 17:29:48 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyzxr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>