<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:49:45 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5242] Test hang sanity test_132, test_133: umount ost</title>
                <link>https://jira.whamcloud.com/browse/LU-5242</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Nathaniel Clark &amp;lt;nathaniel.l.clark@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://maloo.whamcloud.com/test_sets/e5783778-f887-11e3-b13a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/e5783778-f887-11e3-b13a-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_132 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity 132&lt;/p&gt;</description>
                <environment></environment>
        <key id="25269">LU-5242</key>
            <summary>Test hang sanity test_132, test_133: umount ost</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="isaac">Isaac Huang</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>prz</label>
                    </labels>
                <created>Mon, 23 Jun 2014 16:02:54 +0000</created>
                <updated>Thu, 23 Nov 2017 18:55:59 +0000</updated>
                            <resolved>Mon, 18 May 2015 14:20:28 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.7.0</version>
                    <version>Lustre 2.5.3</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>26</watches>
                                                                            <comments>
                            <comment id="87292" author="green" created="Mon, 23 Jun 2014 17:25:18 +0000"  >&lt;p&gt;MDS2 traces have a very characteristic signature of &quot;waiting for obd_unlinked_exports more than XX&quot; - there a bunch of bug about this, they hit from tiem to time on unmount.&lt;/p&gt;</comment>
                            <comment id="87623" author="adilger" created="Thu, 26 Jun 2014 20:58:42 +0000"  >&lt;p&gt;It looks like there is some kind of problem in cleaning up the OSP device, and this has been causing a significant number of failures recently (about 10% for both review-zfs and review-dne-part-1).&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff88005b849740[0x0, 1, [0x1:0x0:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff88005b849790
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff88005b849800osd-ldiskfs-object@ffff88005b849800(i:ffff88007b5fe5a8:144/3610117262)[plain]
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff88005b849740
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff880046ba3600[0x0, 1, [0x200000003:0x0:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff880046ba3650
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff88007954f680osd-ldiskfs-object@ffff88007954f680(i:ffff88007b60c628:110/3610117227)[plain]
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff880046ba3600
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff88006abf6800[0x0, 1, [0x200000003:0x2:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff88006abf6850
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff88006abf6c80osd-ldiskfs-object@ffff88006abf6c80(i:ffff880057c7c1d0:32771/3610117228)[plain]
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff88006abf6800
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff8800572f4680[0x0, 1, [0xa:0x0:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff8800572f46d0
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff8800684f6500osd-ldiskfs-object@ffff8800684f6500(i:ffff880060b77110:178/3610117296)[plain]
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff8800572f4680
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff88006abf62c0[0x1, 1, [0x200000001:0x1017:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff88006abf6310
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff88006abf6d40osd-ldiskfs-object@ffff88006abf6d40(i:ffff880057c706e8:524289/986286545)[plain]
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff88006abf62c0
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff880067b38e00[0x0, 1, [0x4c0000402:0x2f:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....mdt@ffff880067b38e50mdt-object@ffff880067b38e00(ioepoch=0 flags=0x0, epochcount=0, writecount=0)
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....mdd@ffff880057491cd0mdd-object@ffff880057491cd0(open_count=0, valid=0, cltime=0, flags=0)
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....lod@ffff8800717d0bf0lod-object@ffff8800717d0bf0
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osp@ffff8800717d3d00osp-object@ffff8800717d3cb0
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff880067b38e00
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff88007a048928[0x0, 1, [0x440000bd1:0x2:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....mdt@ffff88007a048978mdt-object@ffff88007a048928(ioepoch=0 flags=0x0, epochcount=0, writecount=0)
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....mdd@ffff880061a52e10mdd-object@ffff880061a52e10(open_count=0, valid=0, cltime=0, flags=0)
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....lod@ffff880061a40830lod-object@ffff880061a40830
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osp@ffff88005e25e1a0osp-object@ffff88005e25e150
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff88007a048928
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) header@ffff880052e8ad08[0x0, 2, [0x440000400:0xd5:0x0] hash exist]{
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....mdt@ffff880052e8ad58mdt-object@ffff880052e8ad08(ioepoch=0 flags=0x0, epochcount=0, writecount=0)
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....mdd@ffff8800717fc3c0mdd-object@ffff8800717fc3c0(open_count=0, valid=0, cltime=0, flags=0)
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....lod@ffff880052e88c68lod-object@ffff880052e88c68
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) ....osp@ffff88005ba3f5b0osp-object@ffff88005ba3f560
03:21:03:LustreError: 7751:0:(osp_dev.c:858:osp_device_free()) } header@ffff880052e8ad08
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It may be that this is a new type of failure caused by a recently landed patch, or it could be the same failure that Oleg describes that has been made worse by some other patch.&lt;/p&gt;</comment>
                            <comment id="87708" author="pjones" created="Fri, 27 Jun 2014 17:16:49 +0000"  >&lt;p&gt;Emoly&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="87823" author="adilger" created="Mon, 30 Jun 2014 18:48:27 +0000"  >&lt;p&gt;Emoly,&lt;br/&gt;
could you please look into the recent sanity test_132 failures to see when the above failures started in osp_device_free().  I think this particular failure mode started fairly recently, and is different from older failures reported in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4062&quot; title=&quot;sanity test_132: MGS is waiting for obd_unlinked_exports more than 512 seconds&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4062&quot;&gt;&lt;del&gt;LU-4062&lt;/del&gt;&lt;/a&gt;/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3665&quot; title=&quot;obdfilter-survey test_3a: unmount stuck in obd_exports_barrier()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3665&quot;&gt;&lt;del&gt;LU-3665&lt;/del&gt;&lt;/a&gt;.  If we can track down when the more recent failures started we could see if they relate to a particular patch landing.&lt;/p&gt;</comment>
                            <comment id="87853" author="emoly.liu" created="Tue, 1 Jul 2014 00:02:52 +0000"  >&lt;p&gt;I can hit this &quot;hang&quot; issue on zfs in my local test, but not every time. I will investigate it.&lt;/p&gt;</comment>
                            <comment id="87863" author="emoly.liu" created="Tue, 1 Jul 2014 04:36:17 +0000"  >&lt;p&gt;Yesterday I happened to hit this problem, and its call trace is same to the one shown in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4062&quot; title=&quot;sanity test_132: MGS is waiting for obd_unlinked_exports more than 512 seconds&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4062&quot;&gt;&lt;del&gt;LU-4062&lt;/del&gt;&lt;/a&gt;. But unfortunately, I can&apos;t reproduce it today. &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jun 30 16:16:28 centos6-x kernel: umount        D 0000000000000000     0 31062  31061 0x00000080
Jun 30 16:16:28 centos6-x kernel: ffff8800176b7aa8 0000000000000086 ffff8800176b7a08 ffff88001f4c3c00
Jun 30 16:16:28 centos6-x kernel: ffffffffa12fbd1d 0000000000000000 ffff88000e00c184 ffffffffa12fbd1d
Jun 30 16:16:28 centos6-x kernel: ffff88001f405098 ffff8800176b7fd8 000000000000fb88 ffff88001f405098
Jun 30 16:16:28 centos6-x kernel: Call Trace:
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff8150f362&amp;gt;] schedule_timeout+0x192/0x2e0
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff810811e0&amp;gt;] ? process_timeout+0x0/0x10
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa1281acb&amp;gt;] obd_exports_barrier+0xab/0x180 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa0ba776f&amp;gt;] ofd_device_fini+0x5f/0x260 [ofd]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12a8633&amp;gt;] class_cleanup+0x573/0xd30 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12838a6&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12aa35a&amp;gt;] class_process_config+0x156a/0x1ad0 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12a26fb&amp;gt;] ? lustre_cfg_new+0x2cb/0x680 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12aaa39&amp;gt;] class_manual_cleanup+0x179/0x6f0 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12838a6&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12e8269&amp;gt;] server_put_super+0x8f9/0xe50 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff8118363b&amp;gt;] generic_shutdown_super+0x5b/0xe0
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff81183726&amp;gt;] kill_anon_super+0x16/0x60
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffffa12ac916&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff81183ec7&amp;gt;] deactivate_super+0x57/0x80
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff811a21bf&amp;gt;] mntput_no_expire+0xbf/0x110
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff811a2c2b&amp;gt;] sys_umount+0x7b/0x3a0
Jun 30 16:16:28 centos6-x kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I am checking the recent maloo test reports to see when the failure started.&lt;/p&gt;</comment>
                            <comment id="87897" author="emoly.liu" created="Tue, 1 Jul 2014 15:36:56 +0000"  >&lt;p&gt;I checked and vetted recent two months sanity.sh test_132 failure logs and the logs show that there are three kinds of timeout failure.&lt;/p&gt;

&lt;p&gt;1) &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4062&quot; title=&quot;sanity test_132: MGS is waiting for obd_unlinked_exports more than 512 seconds&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4062&quot;&gt;&lt;del&gt;LU-4062&lt;/del&gt;&lt;/a&gt;/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3665&quot; title=&quot;obdfilter-survey test_3a: unmount stuck in obd_exports_barrier()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3665&quot;&gt;&lt;del&gt;LU-3665&lt;/del&gt;&lt;/a&gt;, as Oleg said, its signature is &quot;XXX is waiting for obd_unlinked_exports more than xxx seconds&quot;. It really does happen from time to time, even in yesterday&apos;s test on the latest master branch &lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/1a2b5a56-00e2-11e4-b331-5254006e85c2/show_text&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/1a2b5a56-00e2-11e4-b331-5254006e85c2/show_text&lt;/a&gt; .&lt;br/&gt;
This issue will be probably fixed by the patch &lt;a href=&quot;http://review.whamcloud.com/#/c/9350&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9350&lt;/a&gt; .&lt;/p&gt;

&lt;p&gt;2) &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt;, as Andreas said, its signature is &#8220;(osp_dev.c:858:osp_device_free()) } header@&#8221; and&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) } header@ffff8800607cee00
12:34:53:LustreError: 7776:0:(lu_object.c:1198:lu_device_fini()) ASSERTION( atomic_read(&amp;amp;d-&amp;gt;ld_ref) == 0 ) failed: Refcount is 1
12:34:53:LustreError: 11252:0:(mdt_handler.c:4376:mdt_fini()) ASSERTION( atomic_read(&amp;amp;d-&amp;gt;ld_ref) == 0 ) failed: 
12:34:53:LustreError: 11252:0:(mdt_handler.c:4376:mdt_fini()) LBUG
12:34:53:Pid: 11252, comm: umount
12:34:53:
12:34:53:Call Trace:
12:34:53: [&amp;lt;ffffffffa048e895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
12:34:53: [&amp;lt;ffffffffa048ee97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
12:34:53: [&amp;lt;ffffffffa0ebccaf&amp;gt;] mdt_device_fini+0xc8f/0xcd0 [mdt]
12:34:53: [&amp;lt;ffffffffa05c3f5d&amp;gt;] ? class_disconnect_exports+0x17d/0x2f0 [obdclass]
12:34:53: [&amp;lt;ffffffffa05e6443&amp;gt;] class_cleanup+0x573/0xd30 [obdclass]
12:34:53: [&amp;lt;ffffffffa05c1836&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
12:34:53: [&amp;lt;ffffffffa05e816a&amp;gt;] class_process_config+0x156a/0x1ad0 [obdclass]
12:34:53: [&amp;lt;ffffffffa0499478&amp;gt;] ? libcfs_log_return+0x28/0x40 [libcfs]
12:34:53: [&amp;lt;ffffffffa05e0549&amp;gt;] ? lustre_cfg_new+0x309/0x680 [obdclass]
12:34:53: [&amp;lt;ffffffffa05e8849&amp;gt;] class_manual_cleanup+0x179/0x6f0 [obdclass]
12:34:53: [&amp;lt;ffffffffa05c1836&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
12:34:53: [&amp;lt;ffffffffa0625f09&amp;gt;] server_put_super+0x8f9/0xe50 [obdclass]
12:34:53: [&amp;lt;ffffffff8118af0b&amp;gt;] generic_shutdown_super+0x5b/0xe0
12:34:53: [&amp;lt;ffffffff8118aff6&amp;gt;] kill_anon_super+0x16/0x60
12:34:53: [&amp;lt;ffffffffa05ea726&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
12:34:53: [&amp;lt;ffffffff8118b797&amp;gt;] deactivate_super+0x57/0x80
12:34:53: [&amp;lt;ffffffff811aa79f&amp;gt;] mntput_no_expire+0xbf/0x110
12:34:53: [&amp;lt;ffffffff811ab2eb&amp;gt;] sys_umount+0x7b/0x3a0
12:34:53: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Seems this problem has gone since June-15.&lt;/p&gt;

&lt;p&gt;3) &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5277&quot; title=&quot;sanity test_132: mdt_build_target_list(), unable to handle kernel NULL pointer dereference&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5277&quot;&gt;&lt;del&gt;LU-5277&lt;/del&gt;&lt;/a&gt;, I just file this in a new ticket. Its signature is &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;03:45:20:BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
03:45:20:IP: [&amp;lt;ffffffffa108b30b&amp;gt;] mdt_build_target_list+0xfb/0x6a0 [mdt]
03:45:20:PGD 0 
03:45:20:Oops: 0000 [#1] SMP 
03:45:20:last sysfs file: /sys/devices/system/cpu/online
03:45:20:CPU 1 
03:45:20:Modules linked in: osp(U) lod(U) mdt(U) mdd(U) mgs(U) mgc(U) osd_zfs(U) lquota(U) lustre(U) lov(U) osc(U) mdc(U) fid(U) fld(U) ptlrpc(U) obdclass(U) ksocklnd(U) scollective(U) gossip(U) lnet(U) lvfs(U) sha512_generic sha256_generic libcfs(U) nfsd exportfs autofs4 nfs lockd fscache auth_rpcgss nfs_acl sunrpc ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core zfs(P)(U) zcommon(P)(U) znvpair(P)(U) zavl(P)(U) zunicode(P)(U) spl(U) zlib_deflate microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: llog_test]
03:45:20:
03:45:20:Pid: 16913, comm: mdt00_001 Tainted: P           ---------------    2.6.32-358.6.2.el6_lustre.g1624f5c.x86_64 #1 Red Hat KVM
03:45:20:RIP: 0010:[&amp;lt;ffffffffa108b30b&amp;gt;]  [&amp;lt;ffffffffa108b30b&amp;gt;] mdt_build_target_list+0xfb/0x6a0 [mdt]
03:45:20:RSP: 0018:ffff88006e32bba0  EFLAGS: 00010246
03:45:20:RAX: ffff88006c779ba0 RBX: ffff88002ea7a5c0 RCX: fffffffffffffffe
03:45:20:RDX: 0000000000000000 RSI: ffff88006c779b18 RDI: ffff88002ea7a5c0
03:45:20:RBP: ffff88006e32bc00 R08: ffff880048a91720 R09: 0000000000000000
03:45:20:R10: ffff88002d578800 R11: 0000000000000418 R12: 0000000000000000
03:45:20:R13: d4f7928dd528ca79 R14: ffff88006c779b18 R15: ffff88002dd6b000
03:45:20:FS:  00007f7d60754700(0000) GS:ffff880002300000(0000) knlGS:0000000000000000
03:45:20:CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
03:45:20:CR2: 0000000000000030 CR3: 0000000001a85000 CR4: 00000000000006e0
03:45:20:DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
03:45:20:DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
03:45:20:Process mdt00_001 (pid: 16913, threadinfo ffff88006e32a000, task ffff88007005f500)
03:45:20:Stack:
03:45:20: ffff88006e32bbd0 0000000000000000 ffff88006e32bc18 ffffffffa1089e50
03:45:20:&amp;lt;d&amp;gt; ffffffffffffffff ffff88002dd6b000 ffff88006e32bc00 ffff88004b9d17d8
03:45:20:&amp;lt;d&amp;gt; ffff88002dd6b378 d4f7928dd528ca79 ffff88002dd6b000 0000000000000000
03:45:20:Call Trace:
03:45:20: [&amp;lt;ffffffffa1089e50&amp;gt;] ? mdt_corpc_add_shard+0x0/0x5d0 [mdt]
03:45:20: [&amp;lt;ffffffffa108ef76&amp;gt;] mdt_corpc_release_epochs+0x136/0x6c0 [mdt]
03:45:20: [&amp;lt;ffffffffa108f5fb&amp;gt;] mdt_container_close+0xfb/0x200 [mdt]
03:45:20: [&amp;lt;ffffffffa106e0de&amp;gt;] mdt_mfd_close+0x2de/0x700 [mdt]
03:45:20: [&amp;lt;ffffffffa106e9e2&amp;gt;] mdt_done_writing+0x4e2/0xd50 [mdt]
03:45:20: [&amp;lt;ffffffffa103df0d&amp;gt;] ? mdt_unpack_req_pack_rep+0x4d/0x4d0 [mdt]
03:45:20: [&amp;lt;ffffffffa0983d9c&amp;gt;] ? lustre_msg_get_version+0x8c/0x100 [ptlrpc]
03:45:20: [&amp;lt;ffffffffa1047b98&amp;gt;] mdt_handle_common+0x648/0x1690 [mdt]
03:45:20: [&amp;lt;ffffffffa10818f5&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
03:45:20: [&amp;lt;ffffffffa0993558&amp;gt;] ptlrpc_server_handle_request+0x398/0xc60 [ptlrpc]
03:45:20: [&amp;lt;ffffffffa05e65de&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
03:45:20: [&amp;lt;ffffffffa05f8c5f&amp;gt;] ? lc_watchdog_touch+0x6f/0x170 [libcfs]
03:45:20: [&amp;lt;ffffffffa098a8b9&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
03:45:20: [&amp;lt;ffffffff81055ab3&amp;gt;] ? __wake_up+0x53/0x70
03:45:20: [&amp;lt;ffffffffa09948ee&amp;gt;] ptlrpc_main+0xace/0x1710 [ptlrpc]
03:45:20: [&amp;lt;ffffffffa0993e20&amp;gt;] ? ptlrpc_main+0x0/0x1710 [ptlrpc]
03:45:20: [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
03:45:20: [&amp;lt;ffffffffa0993e20&amp;gt;] ? ptlrpc_main+0x0/0x1710 [ptlrpc]
03:45:20: [&amp;lt;ffffffffa0993e20&amp;gt;] ? ptlrpc_main+0x0/0x1710 [ptlrpc]
03:45:20: [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Seems this problem also has gone since June-17.&lt;/p&gt;

&lt;p&gt;The frequency of these issues is 1) &amp;gt; 3) &amp;gt; 2). BTW, unfortunately, many tests have no test logs, so I can&apos;t tell which situation they belong to.&lt;/p&gt;</comment>
                            <comment id="87943" author="adilger" created="Wed, 2 Jul 2014 06:51:57 +0000"  >&lt;p&gt;Emoly, thank you for your detailed investigation. When you write that the problem #2 and #3 &quot;have gone since June NN&quot; do you mean they have &quot;gone away&quot; == no longer happening, or &quot;have gone on&quot; == started on?&lt;/p&gt;

&lt;p&gt;Could you check if patches were landed on those days (or a day before) that might have caused these failures?  Even better is to find if the very first failures were happening on a patch before it was landed to master.  You can check &lt;a href=&quot;https://old-testing.hpdd.intel.com/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://old-testing.hpdd.intel.com/&lt;/a&gt; for the old failure logs. &lt;/p&gt;</comment>
                            <comment id="87948" author="emoly.liu" created="Wed, 2 Jul 2014 07:12:23 +0000"  >&lt;p&gt;Andreas, sorry for my unclear expression, I mean problem #2 and #3 have gone away since the middle of June.&lt;/p&gt;

&lt;p&gt;I will check those old failure logs and see if the failure is related to some patches landed on those days, and then give an update.&lt;/p&gt;</comment>
                            <comment id="88067" author="emoly.liu" created="Thu, 3 Jul 2014 04:09:50 +0000"  >&lt;p&gt;Since problem #3 &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5277&quot; title=&quot;sanity test_132: mdt_build_target_list(), unable to handle kernel NULL pointer dereference&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5277&quot;&gt;&lt;del&gt;LU-5277&lt;/del&gt;&lt;/a&gt; only happened in a daos-regression group,  I re-filed it to FF-273. So we can only force on problem #2.&lt;/p&gt;</comment>
                            <comment id="88083" author="emoly.liu" created="Thu, 3 Jul 2014 09:33:02 +0000"  >&lt;p&gt;I checked the test logs one by one and only found two instances with problem #2 (I mean the console logs show &#8220;(osp_dev.c:858:osp_device_free()) } header@&#8221; obviously).&lt;br/&gt;
They are &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/f19d7516-f433-11e3-9db1-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/f19d7516-f433-11e3-9db1-5254006e85c2&lt;/a&gt; and &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/1a7d3012-f4c9-11e3-b233-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/1a7d3012-f4c9-11e3-b233-5254006e85c2&lt;/a&gt; , which are from the same build &lt;a href=&quot;https://build.hpdd.intel.com/job/lustre-reviews/24535&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.hpdd.intel.com/job/lustre-reviews/24535&lt;/a&gt; triggered by wangdi. &lt;/p&gt;

&lt;p&gt;If I am right, problem #1 is the only problem.&lt;/p&gt;</comment>
                            <comment id="88174" author="di.wang" created="Fri, 4 Jul 2014 04:29:56 +0000"  >&lt;p&gt;Hmm, problem #2 (LBUG during umount mdt ) only happened 2 times and it seems it had not happen since June 15th, so it was probably already being fixed. &lt;/p&gt;

&lt;p&gt;I checked the all of test_132 timeout status, it seems most of them happened in ZFS and umount -f ost, maybe some one familiar with OST stack should have a look, Thanks.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;05:18:16:Lustre: 3931:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213483/real 1404213483]  req@ffff88006d539c00 x1472420846307548/t0(0) o400-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 224/224 e 0 to 1 dl 1404213490 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:18:16:LustreError: 166-1: MGC10.1.6.21@tcp: Connection to MGS (at 10.1.6.21@tcp) was lost; in progress operations using this service will fail
05:18:16:Lustre: DEBUG MARKER: ! zpool list -H lustre-ost1 &amp;gt;/dev/null 2&amp;gt;&amp;amp;1 ||
05:25:51:			grep -q ^lustre-ost1/ /proc/mounts ||
05:25:51:			zpool export  lustre-ost1
05:25:51:Lustre: DEBUG MARKER: grep -c /mnt/ost2&apos; &apos; /proc/mounts
05:25:52:Lustre: DEBUG MARKER: umount -d -f /mnt/ost2
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213490/real 1404213490]  req@ffff8800683eb400 x1472420846307568/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213496 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: lustre-OST0001 is waiting for obd_unlinked_exports more than 8 seconds. The obd refcount = 5. Is it stuck?
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213500/real 1404213500]  req@ffff8800683eb000 x1472420846307572/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213511 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: lustre-OST0001 is waiting for obd_unlinked_exports more than 16 seconds. The obd refcount = 5. Is it stuck?
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213515/real 1404213515]  req@ffff88002eb48c00 x1472420846307576/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213531 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: lustre-OST0001 is waiting for obd_unlinked_exports more than 32 seconds. The obd refcount = 5. Is it stuck?
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213535/real 1404213535]  req@ffff88002e895c00 x1472420846307580/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213556 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213560/real 1404213560]  req@ffff88006d82a000 x1472420846307584/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213585 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213590/real 1404213590]  req@ffff88006a6d6c00 x1472420846307588/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213615 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: lustre-OST0001 is waiting for obd_unlinked_exports more than 64 seconds. The obd refcount = 5. Is it stuck?
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213650/real 1404213650]  req@ffff88002e895c00 x1472420846307596/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213675 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 1 previous similar message
05:25:52:Lustre: lustre-OST0001 is waiting for obd_unlinked_exports more than 128 seconds. The obd refcount = 5. Is it stuck?
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213740/real 1404213740]  req@ffff88002eb48c00 x1472420846307608/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213765 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 2 previous similar messages
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1404213880/real 1404213880]  req@ffff88002e895c00 x1472420846307628/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1404213905 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:25:52:Lustre: 3930:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 4 previous similar messages
05:25:52:INFO: task umount:8756 blocked for more than 120 seconds.
05:25:52:      Tainted: P           ---------------    2.6.32-431.17.1.el6_lustre.gdc86085.x86_64 #1
05:25:52:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
05:25:52:umount        D 0000000000000000     0  8756   8755 0x00000080
05:25:52: ffff88006f099aa8 0000000000000082 ffff88006f099a08 ffff880071fcec00
05:31:55: ffffffffa079de77 0000000000000000 ffff88007142c184 ffffffffa079de77
05:31:55: ffff8800792a1ab8 ffff88006f099fd8 000000000000fbc8 ffff8800792a1ab8
05:31:55:Call Trace:
05:31:55: [&amp;lt;ffffffff81528e82&amp;gt;] schedule_timeout+0x192/0x2e0
05:31:55: [&amp;lt;ffffffff81083e90&amp;gt;] ? process_timeout+0x0/0x10
05:31:55: [&amp;lt;ffffffffa0720a6b&amp;gt;] obd_exports_barrier+0xab/0x180 [obdclass]
05:31:55: [&amp;lt;ffffffffa0f7476f&amp;gt;] ofd_device_fini+0x5f/0x260 [ofd]
05:31:55: [&amp;lt;ffffffffa07475d3&amp;gt;] class_cleanup+0x573/0xd30 [obdclass]
05:31:55: [&amp;lt;ffffffffa0722846&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
05:31:55: [&amp;lt;ffffffffa07492fa&amp;gt;] class_process_config+0x156a/0x1ad0 [obdclass]
05:31:55: [&amp;lt;ffffffffa05fa488&amp;gt;] ? libcfs_log_return+0x28/0x40 [libcfs]
05:31:55: [&amp;lt;ffffffffa07416d9&amp;gt;] ? lustre_cfg_new+0x309/0x680 [obdclass]
05:31:55: [&amp;lt;ffffffffa07499d9&amp;gt;] class_manual_cleanup+0x179/0x6f0 [obdclass]
05:31:55: [&amp;lt;ffffffffa0722846&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
05:31:55: [&amp;lt;ffffffffa0787209&amp;gt;] server_put_super+0x8f9/0xe50 [obdclass]
05:31:55: [&amp;lt;ffffffff8118af0b&amp;gt;] generic_shutdown_super+0x5b/0xe0
05:31:55: [&amp;lt;ffffffff8118aff6&amp;gt;] kill_anon_super+0x16/0x60
05:31:55: [&amp;lt;ffffffffa074b8b6&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
05:31:55: [&amp;lt;ffffffff8118b797&amp;gt;] deactivate_super+0x57/0x80
05:31:55: [&amp;lt;ffffffff811aa79f&amp;gt;] mntput_no_expire+0xbf/0x110
05:31:55: [&amp;lt;ffffffff811ab2eb&amp;gt;] sys_umount+0x7b/0x3a0
05:31:55: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
05:31:55:Lustre: lustre-OST0001 is waiting for obd_unlinked_exports more than 256 seconds. The obd refcount = 5. Is it stuck?
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

</comment>
                            <comment id="88205" author="adilger" created="Fri, 4 Jul 2014 17:56:21 +0000"  >&lt;p&gt;Di, the reason that this failure was not hit since June 15 is because that is the last time your patch &lt;a href=&quot;http://review.whamcloud.com/10673&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10673&lt;/a&gt; for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5163&quot; title=&quot;(lu_object.h:852:lu_object_attr()) ASSERTION( ((o)-&amp;gt;lo_header-&amp;gt;loh_attr &amp;amp; LOHA_EXISTS) != 0 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5163&quot;&gt;&lt;del&gt;LU-5163&lt;/del&gt;&lt;/a&gt; was tested.  This problem has only been hit when testing that patch.&lt;/p&gt;</comment>
                            <comment id="88225" author="di.wang" created="Sat, 5 Jul 2014 05:47:31 +0000"  >&lt;p&gt;Ah, ok. I will fix the patch 10673 in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5163&quot; title=&quot;(lu_object.h:852:lu_object_attr()) ASSERTION( ((o)-&amp;gt;lo_header-&amp;gt;loh_attr &amp;amp; LOHA_EXISTS) != 0 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5163&quot;&gt;&lt;del&gt;LU-5163&lt;/del&gt;&lt;/a&gt;.  Alex, Could you please comment on this one, not so familiar with ZFS.&lt;/p&gt;</comment>
                            <comment id="88261" author="bzzz" created="Mon, 7 Jul 2014 13:51:20 +0000"  >&lt;p&gt;I checked on log - &lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/846317de-f434-11e3-9db1-5254006e85c2/show_text:&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/846317de-f434-11e3-9db1-5254006e85c2/show_text:&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;2:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) header@ffff88006a3c3980[0x0, 1, &lt;span class=&quot;error&quot;&gt;&amp;#91;0x1:0x0:0x0&amp;#93;&lt;/span&gt; hash exist]&lt;/p&gt;
{
12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff88006a3c39d0
12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff880061e9dc80osd-ldiskfs-object@ffff880061e9dc80(i:ffff8800578a97e8:144/3738986019)[plain]
12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) }
&lt;p&gt; header@ffff88006a3c3980&lt;/p&gt;

&lt;p&gt;doesn&apos;t seem to be specific to ZFS backend?&lt;/p&gt;</comment>
                            <comment id="88337" author="di.wang" created="Mon, 7 Jul 2014 17:38:01 +0000"  >&lt;blockquote&gt;
&lt;p&gt;2:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) header@ffff88006a3c3980[0x0, 1, &lt;span class=&quot;error&quot;&gt;&amp;#91;0x1:0x0:0x0&amp;#93;&lt;/span&gt; hash exist]&lt;br/&gt;
12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) ....local_storage@ffff88006a3c39d0 12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) ....osd-ldiskfs@ffff880061e9dc80osd-ldiskfs-object@ffff880061e9dc80(i:ffff8800578a97e8:144/3738986019)&lt;span class=&quot;error&quot;&gt;&amp;#91;plain&amp;#93;&lt;/span&gt; 12:34:53:LustreError: 7776:0:(osp_dev.c:858:osp_device_free()) &lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Oh, this is caused by my patch &lt;a href=&quot;http://review.whamcloud.com/#/c/10673/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10673/&lt;/a&gt;  , which is not being landed yet, I will update the patch anyway.&lt;/p&gt;

&lt;p&gt;But this ticket is about umount on ZFS, &lt;a href=&quot;https://testing.hpdd.intel.com/sub_tests/query?utf8=&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/sub_tests/query?utf8=&lt;/a&gt;&#10003;&amp;amp;test_set%5Btest_set_script_id%5D=f9516376-32bc-11e0-aaee-52540025f9ae&amp;amp;sub_test%5Bsub_test_script_id%5D=12fcb374-32be-11e0-b685-52540025f9ae&amp;amp;sub_test%5Bstatus%5D=TIMEOUT&amp;amp;sub_test%5Bquery_bugs%5D=&amp;amp;test_session%5Btest_host%5D=&amp;amp;test_session%5Btest_group%5D=&amp;amp;test_session%5Buser_id%5D=&amp;amp;test_session%5Bquery_date%5D=&amp;amp;test_session%5Bquery_recent_period%5D=&amp;amp;test_node%5Bos_type_id%5D=&amp;amp;test_node%5Bdistribution_type_id%5D=&amp;amp;test_node%5Barchitecture_type_id%5D=&amp;amp;test_node%5Bfile_system_type_id%5D=&amp;amp;test_node%5Blustre_branch_id%5D=&amp;amp;test_node_network%5Bnetwork_type_id%5D=&amp;amp;commit=Update+results &lt;/p&gt;

&lt;p&gt;please check.&lt;/p&gt;</comment>
                            <comment id="88404" author="bzzz" created="Tue, 8 Jul 2014 05:22:25 +0000"  >&lt;p&gt;this is &quot;known&quot; issue with no resolution yet &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/sad.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;ll_ost00_004  D 0000000000000001     0 26767      2 0x00000080&lt;br/&gt;
 ffff88007946f970 0000000000000046 000000016fef8aa0 0000000000000001&lt;br/&gt;
 ffff88006dcc01f0 0000000000000082 ffff88007946f950 ffff88007607bd80&lt;br/&gt;
 ffff88006fef9058 ffff88007946ffd8 000000000000fbc8 ffff88006fef9058&lt;br/&gt;
Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109b14e&amp;gt;&amp;#93;&lt;/span&gt; ? prepare_to_wait_exclusive+0x4e/0x80&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa014347d&amp;gt;&amp;#93;&lt;/span&gt; cv_wait_common+0xed/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109af00&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa01434e5&amp;gt;&amp;#93;&lt;/span&gt; __cv_wait+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0242f7b&amp;gt;&amp;#93;&lt;/span&gt; txg_wait_open+0x7b/0xa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa020a95e&amp;gt;&amp;#93;&lt;/span&gt; dmu_tx_wait+0x29e/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152965e&amp;gt;&amp;#93;&lt;/span&gt; ? mutex_lock+0x1e/0x50&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa020aa01&amp;gt;&amp;#93;&lt;/span&gt; dmu_tx_assign+0x91/0x490 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e9656c&amp;gt;&amp;#93;&lt;/span&gt; osd_trans_start+0x9c/0x410 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f9e79c&amp;gt;&amp;#93;&lt;/span&gt; ofd_trans_start+0x7c/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f9f4c3&amp;gt;&amp;#93;&lt;/span&gt; ofd_object_destroy+0x203/0x680 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f9afed&amp;gt;&amp;#93;&lt;/span&gt; ofd_destroy_by_fid+0x35d/0x620 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0973e80&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_blocking_ast+0x0/0x180 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09754e0&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_completion_ast+0x0/0x930 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f948d2&amp;gt;&amp;#93;&lt;/span&gt; ofd_destroy_hdl+0x2e2/0xb80 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a01a7c&amp;gt;&amp;#93;&lt;/span&gt; tgt_request_handle+0x23c/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09b129a&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xd1a/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09b0580&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/p&gt;


&lt;p&gt; ffff88006fe55ba0 0000000000000046 00000000ffffffff 000015e4f4d9c106&lt;br/&gt;
 ffff88006fe55b10 ffff88007034b910 000000000045358e ffffffffac457b12&lt;br/&gt;
 ffff8800797a2638 ffff88006fe55fd8 000000000000fbc8 ffff8800797a2638&lt;br/&gt;
Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a6d01&amp;gt;&amp;#93;&lt;/span&gt; ? ktime_get_ts+0xb1/0xf0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff815287f3&amp;gt;&amp;#93;&lt;/span&gt; io_schedule+0x73/0xc0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa014341c&amp;gt;&amp;#93;&lt;/span&gt; cv_wait_common+0x8c/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109af00&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa01434a8&amp;gt;&amp;#93;&lt;/span&gt; __cv_wait_io+0x18/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa02890ab&amp;gt;&amp;#93;&lt;/span&gt; zio_wait+0xfb/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021ebe3&amp;gt;&amp;#93;&lt;/span&gt; dsl_pool_sync+0x2b3/0x3f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0236e4b&amp;gt;&amp;#93;&lt;/span&gt; spa_sync+0x40b/0xa60 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0243916&amp;gt;&amp;#93;&lt;/span&gt; txg_sync_thread+0x2e6/0x510 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810591a9&amp;gt;&amp;#93;&lt;/span&gt; ? set_user_nice+0xc9/0x130&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0243630&amp;gt;&amp;#93;&lt;/span&gt; ? txg_sync_thread+0x0/0x510 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;there are many duplicates, actually.. for exaple, &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4716&quot; title=&quot;replay-ost-single test_5: stuck in dbuf_read-&amp;gt;zio_wait&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4716&quot;&gt;&lt;del&gt;LU-4716&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;we discussed this few times with Brian B., but the root cause is still unclear..&lt;/p&gt;</comment>
                            <comment id="88753" author="di.wang" created="Thu, 10 Jul 2014 18:49:16 +0000"  >&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;Andreas Dilger&amp;#93;&lt;/span&gt; Deleted this comment because this problem is actually &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3665&quot; title=&quot;obdfilter-survey test_3a: unmount stuck in obd_exports_barrier()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3665&quot;&gt;&lt;del&gt;LU-3665&lt;/del&gt;&lt;/a&gt; and didn&apos;t want this incorrectly appearing in search results.&lt;/p&gt;</comment>
                            <comment id="88754" author="di.wang" created="Thu, 10 Jul 2014 18:54:28 +0000"  >&lt;p&gt;Fortunately, these two failures only happened once since July, so maybe it should be remark it as major?&lt;/p&gt;</comment>
                            <comment id="88965" author="yujian" created="Mon, 14 Jul 2014 18:09:26 +0000"  >&lt;p&gt;More instance on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/33f21012-098c-11e4-a512-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/33f21012-098c-11e4-a512-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="89356" author="utopiabound" created="Thu, 17 Jul 2014 15:59:49 +0000"  >&lt;p&gt;It does happen more frequently on ZFS eg:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/b575d130-0dc1-11e4-b3f5-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/b575d130-0dc1-11e4-b3f5-5254006e85c2&lt;/a&gt;&lt;br/&gt;
but it does also happen on DNE:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/f019ab4a-0ae2-11e4-8ed2-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/f019ab4a-0ae2-11e4-8ed2-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Failure rate for review-zfs is 22% over the last week&lt;/p&gt;</comment>
                            <comment id="90251" author="di.wang" created="Mon, 28 Jul 2014 23:08:26 +0000"  >&lt;p&gt;Hmm, the failure on DNE is mostly about amount MGS&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jul 13 10:08:02 onyx-51vm3 kernel: Lustre: MGS is waiting for obd_unlinked_exports more than 64 seconds. The obd refcount = 5. Is it stuck?
Jul 13 10:09:08 onyx-51vm3 kernel: Lustre: 3023:0:(client.c:1926:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1405271323/real 1405271323]  req@ffff88005f6fe400 x1473528013665456/t0(0) o250-&amp;gt;MGC10.2.5.30@tcp@0@lo:26/25 lens 400/544 e 0 to 1 dl 1405271348 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
Jul 13 10:09:08 onyx-51vm3 kernel: Lustre: 3023:0:(client.c:1926:ptlrpc_expire_one_request()) Skipped 2 previous similar messages
Jul 13 10:09:16 onyx-51vm3 kernel: LustreError: 137-5: lustre-MDT0000_UUID: not available for connect from 10.2.5.31@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
Jul 13 10:09:16 onyx-51vm3 kernel: LustreError: Skipped 349 previous similar messages
Jul 13 10:10:05 onyx-51vm3 kernel: INFO: task umount:7320 blocked for more than 120 seconds.
Jul 13 10:10:05 onyx-51vm3 kernel:      Not tainted 2.6.32-431.20.3.el6_lustre.x86_64 #1
Jul 13 10:10:05 onyx-51vm3 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Jul 13 10:10:05 onyx-51vm3 kernel: umount        D 0000000000000001     0  7320   7319 0x00000080
Jul 13 10:10:05 onyx-51vm3 kernel: ffff8800627c1aa8 0000000000000086 0000000000000000 ffff88007bd0d400
Jul 13 10:10:05 onyx-51vm3 kernel: ffffffffa063cf3f 0000000000000000 ffff880055446144 ffffffffa063cf3f
Jul 13 10:10:05 onyx-51vm3 kernel: ffff8800627c7af8 ffff8800627c1fd8 000000000000fbc8 ffff8800627c7af8
Jul 13 10:10:05 onyx-51vm3 kernel: Call Trace:
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff81529ac2&amp;gt;] schedule_timeout+0x192/0x2e0
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff81083f30&amp;gt;] ? process_timeout+0x0/0x10
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05bfa8b&amp;gt;] obd_exports_barrier+0xab/0x180 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa0dea57e&amp;gt;] mgs_device_fini+0xfe/0x590 [mgs]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05e6683&amp;gt;] class_cleanup+0x573/0xd30 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05c1866&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05e83aa&amp;gt;] class_process_config+0x156a/0x1ad0 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05e074b&amp;gt;] ? lustre_cfg_new+0x2cb/0x680 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05e8a89&amp;gt;] class_manual_cleanup+0x179/0x6f0 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05c1866&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa062632b&amp;gt;] server_put_super+0x96b/0xe50 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff8118b0cb&amp;gt;] generic_shutdown_super+0x5b/0xe0
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff8118b1b6&amp;gt;] kill_anon_super+0x16/0x60
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffffa05ea966&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff8118b957&amp;gt;] deactivate_super+0x57/0x80
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff811ab35f&amp;gt;] mntput_no_expire+0xbf/0x110
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff811abeab&amp;gt;] sys_umount+0x7b/0x3a0
Jul 13 10:10:05 onyx-51vm3 kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
Jul 13 10:10:10 onyx-51vm3 kernel: Lustre: MGS is waiting for obd_unlinked_exports more than 128 seconds. The obd refcount = 5. Is it stuck?
Jul 13 10:11:38 onyx-51vm3 kernel: Lustre: 3023:0:(client.c:1926:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1405271473/real 1405271473]  req@fff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Probably deserve a new ticket,  but it only happened once in July, so probably not serious enough to be a critical ticket here?&lt;/p&gt;</comment>
                            <comment id="90252" author="di.wang" created="Mon, 28 Jul 2014 23:14:04 +0000"  >&lt;p&gt;Hmm, this umount mgs failure only happens for &lt;a href=&quot;http://review.whamcloud.com/#/c/10249/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10249/&lt;/a&gt;, so I am not sure this is related with that patch or not.  I will create a new ticket if I saw more failures.&lt;/p&gt;</comment>
                            <comment id="90368" author="pjones" created="Tue, 29 Jul 2014 19:35:27 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please look into this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="90414" author="bobijam" created="Wed, 30 Jul 2014 03:10:05 +0000"  >&lt;p&gt;failure with signature of &quot;waiting for obd_unlinked_exports more than XX&quot; is dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3665&quot; title=&quot;obdfilter-survey test_3a: unmount stuck in obd_exports_barrier()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3665&quot;&gt;&lt;del&gt;LU-3665&lt;/del&gt;&lt;/a&gt;, and its fix patch is tracked at &lt;a href=&quot;http://review.whamcloud.com/#/c/9350&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9350&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="90539" author="utopiabound" created="Thu, 31 Jul 2014 14:59:24 +0000"  >&lt;p&gt;I agree the symptom is the same, but fixing obdfilter-survey (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3665&quot; title=&quot;obdfilter-survey test_3a: unmount stuck in obd_exports_barrier()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3665&quot;&gt;&lt;del&gt;LU-3665&lt;/del&gt;&lt;/a&gt;) doesn&apos;t seem like it will fix the failure in sanity/132.  &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3665&quot; title=&quot;obdfilter-survey test_3a: unmount stuck in obd_exports_barrier()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3665&quot;&gt;&lt;del&gt;LU-3665&lt;/del&gt;&lt;/a&gt; is caused by an extra objects created and not cleaned up via &quot;lctl create&quot;&lt;/p&gt;</comment>
                            <comment id="91040" author="bobijam" created="Thu, 7 Aug 2014 06:25:17 +0000"  >&lt;p&gt;from the 1st report &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/e5783778-f887-11e3-b13a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/e5783778-f887-11e3-b13a-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;02:17:13:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252206/real 1403252206]  req@ffff880045c20800 x1471410848609332/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1403252212 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
02:17:13:LustreError: 5280:0:(mgc_request.c:516:do_requeue()) failed processing log: -5
02:17:13:LustreError: 5280:0:(mgc_request.c:516:do_requeue()) Skipped 15 previous similar messages
02:17:13:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252206/real 1403252206]  req@ffff880065b40400 x1471410848609204/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0007@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252217 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
02:17:13:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252206/real 1403252206]  req@ffff88006a05d000 x1471410848609284/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0004@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252222 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
02:17:13:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 4 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252216/real 1403252216]  req@ffff880065ed6400 x1471410848609596/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1403252227 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 2 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252221/real 1403252221]  req@ffff880074a52400 x1471410848609744/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0003@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252237 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252241/real 1403252241]  req@ffff880042557800 x1471410848610120/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0003@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252262 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 8 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252276/real 1403252276]  req@ffff880073b55400 x1471410848610676/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1403252301 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 13 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252341/real 1403252341]  req@ffff880074e19400 x1471410848612092/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0006@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252366 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 18 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252476/real 1403252476]  req@ffff8800672e3800 x1471410848614808/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0003@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252501 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 38 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403252746/real 1403252746]  req@ffff88006633c800 x1471410848620304/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0003@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403252771 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 80 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403253266/real 1403253266]  req@ffff880027636c00 x1471410848630652/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1403253291 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 157 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403253866/real 1403253866]  req@ffff880074dd0000 x1471410848642692/t0(0) o250-&amp;gt;MGC10.1.6.21@tcp@10.1.6.21@tcp:26/25 lens 400/544 e 0 to 1 dl 1403253891 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 179 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403254471/real 1403254471]  req@ffff880074dc6c00 x1471410848654508/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0004@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403254496 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) Skipped 181 previous similar messages
03:05:16:Lustre: 5258:0:(client.c:1924:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1403255086/real 1403255086]  req@ffff880064c9e000 x1471410848666436/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0003@10.1.6.21@tcp:12/10 lens 400/544 e 0 to 1 dl 1403255111 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;There are MGS_CONNECT and MDS_CONNECT RPCs being issued again and again (not resend), it seems that the reconnect engine does not honor  that the obd device (mgc obd and lwp obd) has been cleaned and keeps trying to reconnect the device.&lt;/p&gt;</comment>
                            <comment id="91627" author="green" created="Thu, 14 Aug 2014 17:11:12 +0000"  >&lt;p&gt;I think the root issue comes from this message:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;02:17:17:Lustre: lustre-MDT0002 is waiting for obd_unlinked_exports more than 8 seconds. The obd refcount = 11. Is it stuck?
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Which basically means the obd has some extra references and it&apos;s those references that don&apos;t let it go away and so it tries to reconnect still?&lt;/p&gt;

&lt;p&gt;This sort of a message is what is seen in all of bugs of this class.&lt;/p&gt;</comment>
                            <comment id="91676" author="yujian" created="Thu, 14 Aug 2014 22:39:40 +0000"  >&lt;p&gt;One more instance on master branch: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/a0ca0910-23fe-11e4-84ee-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/a0ca0910-23fe-11e4-84ee-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="91704" author="bobijam" created="Fri, 15 Aug 2014 07:53:14 +0000"  >&lt;p&gt;Is it possible the result of the un-dying connect requests being kept re-generating and they hold the obd refcount?&lt;/p&gt;</comment>
                            <comment id="92246" author="yujian" created="Fri, 22 Aug 2014 17:57:48 +0000"  >&lt;blockquote&gt;&lt;p&gt;Hmm, this umount mgs failure only happens for &lt;a href=&quot;http://review.whamcloud.com/#/c/10249/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10249/&lt;/a&gt;, so I am not sure this is related with that patch or not. I will create a new ticket if I saw more failures.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;While testing patch &lt;a href=&quot;http://review.whamcloud.com/11539&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11539&lt;/a&gt; on Lustre b2_5 branch, sanity-lfsck test 0 hit the unmounting mgs failure. I created a new ticket &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5539&quot; title=&quot;MGS is waiting for obd_unlinked_exports more than 1024 seconds&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5539&quot;&gt;&lt;del&gt;LU-5539&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="92727" author="yujian" created="Thu, 28 Aug 2014 17:13:02 +0000"  >&lt;p&gt;While testing patch &lt;a href=&quot;http://review.whamcloud.com/11574&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11574&lt;/a&gt; on Lustre b2_5 branch with FSTYPE=zfs, sanity test 132 hit the same failure in this ticket.&lt;br/&gt;
Maloo report: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/271e44e8-2d5c-11e4-b550-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/271e44e8-2d5c-11e4-b550-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="93340" author="isaac" created="Fri, 5 Sep 2014 17:03:54 +0000"  >&lt;p&gt;I checked the Maloo report above and found a deadlock on the OSS very similar to the latest ones in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4950&quot; title=&quot;sanity-benchmark test fsx hung: txg_sync was stuck on OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4950&quot;&gt;&lt;del&gt;LU-4950&lt;/del&gt;&lt;/a&gt;:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;ZFS had completed writing new blocks in the syncing txg, and updated the new uberblocks, but got stuck some where in the middle of doing a WRITE_FLUSH_FUA, to make sure the uberblocks do hit persistent storage I guess.&lt;/li&gt;
	&lt;li&gt;So txg_sync couldn&apos;t complete, the syncing thread got stuck there.&lt;/li&gt;
	&lt;li&gt;As the transaction groups couldn&apos;t move forward, the ll_ost00_008 thread also got stuck in txg_wait_open().&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I haven&apos;t figured out why ZFS stuck at doing WRITE_FLUSH_FUA, but there&apos;s two things to move forward at this point:&lt;br/&gt;
1. Set zfs_nocacheflush:&lt;br/&gt;
options zfs zfs_nocacheflush=1&lt;br/&gt;
This makes DKIOCFLUSHWRITECACHE a noop. Then the deadlock may move elsewhere or simply disappear.&lt;br/&gt;
2. Whether zfs_nocacheflush is set or not, once the timeout happens, it&apos;d be useful to gather &quot;zpool events -v&quot; outputs on the OSS, which&apos;d give more details on the state of the stuck zio.&lt;/p&gt;</comment>
                            <comment id="94019" author="bzzz" created="Mon, 15 Sep 2014 18:47:22 +0000"  >&lt;p&gt;Isaac, can you explain what signs point to WRITE_FLUSH_FUA, please ?&lt;/p&gt;</comment>
                            <comment id="94122" author="isaac" created="Tue, 16 Sep 2014 02:57:37 +0000"  >&lt;p&gt;Alex, in the OSS stack dump:&lt;br/&gt;
z_ioctl_iss/0 S 0000000000000001     0 10013      2 0x00000080&lt;br/&gt;
Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa024aa46&amp;gt;&amp;#93;&lt;/span&gt; ? vdev_disk_io_start+0x156/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;The z_ioctl_iss thread handles ZIO_TYPE_IOCTL and the only command is DKIOCFLUSHWRITECACHE, which is handled by vdev_disk_io_flush(). I suspected that vdev_disk_io_flush() was blocking in: bio = bio_alloc(GFP_NOIO, 0) - which blocks until there&apos;s memory to return. Also, txg_sync thread was waiting spa_sync()-&amp;gt;zio_wait(probably the IOCTL zio); and ll_ost00_008 thread was blocking in txg_wait_open(). That&apos;s why I suspected the DKIOCFLUSHWRITECACHE ioctl was blocking and preventing the txgs from moving forward - WRITE_FLUSH_FUA bio was probably not submitted yet.&lt;/p&gt;</comment>
                            <comment id="94124" author="bzzz" created="Tue, 16 Sep 2014 03:29:23 +0000"  >&lt;p&gt;Isaac, have a look here - &lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/c2a5b9e6-2d5c-11e4-b550-5254006e85c2/show_text&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/c2a5b9e6-2d5c-11e4-b550-5254006e85c2/show_text&lt;/a&gt;&lt;br/&gt;
I didn&apos;t notice anything except these:&lt;/p&gt;

&lt;p&gt;Sep 12 19:57:07 shadow-11vm3 kernel: ll_ost00_001  D 0000000000000000     0  9264      2 0x00000080&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff88007c5c7960 0000000000000046 000000007223d500 0000000000000001&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff88006e41a1f0 0000000000000082 ffff88006fd1cae8 ffff8800736e5d80&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff88007223dab8 ffff88007c5c7fd8 000000000000fbc8 ffff88007223dab8&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: Call Trace:&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109b1ee&amp;gt;&amp;#93;&lt;/span&gt; ? prepare_to_wait_exclusive+0x4e/0x80&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa014347d&amp;gt;&amp;#93;&lt;/span&gt; cv_wait_common+0xed/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109afa0&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa01434e5&amp;gt;&amp;#93;&lt;/span&gt; __cv_wait+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0242f7b&amp;gt;&amp;#93;&lt;/span&gt; txg_wait_open+0x7b/0xa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa020a95e&amp;gt;&amp;#93;&lt;/span&gt; dmu_tx_wait+0x29e/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152ad0e&amp;gt;&amp;#93;&lt;/span&gt; ? mutex_lock+0x1e/0x50&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa020aa01&amp;gt;&amp;#93;&lt;/span&gt; dmu_tx_assign+0x91/0x490 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0e8154d&amp;gt;&amp;#93;&lt;/span&gt; osd_trans_start+0xed/0x430 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f81fac&amp;gt;&amp;#93;&lt;/span&gt; ofd_trans_start+0x7c/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f835c0&amp;gt;&amp;#93;&lt;/span&gt; ofd_object_destroy+0x3b0/0x850 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;/p&gt;


&lt;p&gt;Sep 12 19:57:07 shadow-11vm3 kernel: txg_quiesce   S 0000000000000000     0   307      2 0x00000080&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff8800736e5d70 0000000000000046 000007989b5d2dfd 0000000000000000&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: 0000000000000000 0000000000000001 ffff8800736e5d50 0000000000000086&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff88006fd1d058 ffff8800736e5fd8 000000000000fbc8 ffff88006fd1d058&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: Call Trace:&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109b1ee&amp;gt;&amp;#93;&lt;/span&gt; ? prepare_to_wait_exclusive+0x4e/0x80&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa014347d&amp;gt;&amp;#93;&lt;/span&gt; cv_wait_common+0xed/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109afa0&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81058bd3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa01434c5&amp;gt;&amp;#93;&lt;/span&gt; __cv_wait_interruptible+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa02432fd&amp;gt;&amp;#93;&lt;/span&gt; txg_thread_wait+0x1d/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0243629&amp;gt;&amp;#93;&lt;/span&gt; txg_quiesce_thread+0x299/0x2a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Sep 12 19:57:07 shadow-11vm3 kernel: txg_sync      D 0000000000000001     0   308      2 0x00000080&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff8800736e7ba0 0000000000000046 00000000ffffffff 000019aa6a2a17f0&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff8800736e7b10 ffff8800717ffc20 000000000038aee4 ffffffffabecd08f&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: ffff88006fd1c5f8 ffff8800736e7fd8 000000000000fbc8 ffff88006fd1c5f8&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: Call Trace:&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a6d31&amp;gt;&amp;#93;&lt;/span&gt; ? ktime_get_ts+0xb1/0xf0&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81529ea3&amp;gt;&amp;#93;&lt;/span&gt; io_schedule+0x73/0xc0&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa014341c&amp;gt;&amp;#93;&lt;/span&gt; cv_wait_common+0x8c/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109afa0&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa01434a8&amp;gt;&amp;#93;&lt;/span&gt; __cv_wait_io+0x18/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa02890ab&amp;gt;&amp;#93;&lt;/span&gt; zio_wait+0xfb/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021ebe3&amp;gt;&amp;#93;&lt;/span&gt; dsl_pool_sync+0x2b3/0x3f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0236e4b&amp;gt;&amp;#93;&lt;/span&gt; spa_sync+0x40b/0xa60 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 12 19:57:07 shadow-11vm3 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0243916&amp;gt;&amp;#93;&lt;/span&gt; txg_sync_thread+0x2e6/0x510 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="94212" author="isaac" created="Wed, 17 Sep 2014 04:52:18 +0000"  >&lt;p&gt;My previous analysis might be inaccurate as I neglected the &apos;?&apos; in the stack entry:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;z_ioctl_iss/0 S 0000000000000001 0 10013 2 0x00000080
Call Trace:
[&amp;lt;ffffffffa024aa46&amp;gt;] ? vdev_disk_io_start+0x156/0x1b0 [zfs]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Which meant that the vdev_disk_io_start() function likely returned and the WRITE_FLUSH_FUA bio was already submitted - the z_ioctl_iss thread was waiting for more work to do. I checked the OSS stack dumps, and the z_* threads were all idle, so there was no more IO left to do. These led me to believe that:&lt;br/&gt;
1. IO for syncing the txg were all done, and WRITE_FLUSH_FUA was submitted as an IO barrier, i.e. to make sure all previous writes do hit persistent storage.&lt;br/&gt;
2. But the WRITE_FLUSH_FUA somehow didn&apos;t complete, causing txg_sync to wait and hang.&lt;/p&gt;

&lt;p&gt;Suggestions to troubleshoot stay the same: on the OSS, set zfs_nocacheflush, and gather &quot;zpool events -v&quot; outputs.&lt;/p&gt;</comment>
                            <comment id="94214" author="bzzz" created="Wed, 17 Sep 2014 05:46:03 +0000"  >&lt;p&gt;why do you think vdev_disk_io_start() was already used in this txg? that could be a leftover from the previous txg?&lt;/p&gt;</comment>
                            <comment id="94289" author="isaac" created="Wed, 17 Sep 2014 17:59:28 +0000"  >&lt;p&gt;Alex, it was only a guess. If we had &quot;zpool events -v&quot; outputs, that&apos;d provide a lot more information on the stuck zio.&lt;/p&gt;

&lt;p&gt;Also I had a hunch that the stuck zio had something to do with the way the zpool was configured. There seemed to be too many levels of indirection - the use of /dev/lvm-Role_OSS/ indicated LVM which seemed not necessary as zfs could manage its own volumes.&lt;/p&gt;</comment>
                            <comment id="94355" author="adilger" created="Thu, 18 Sep 2014 08:47:44 +0000"  >&lt;p&gt;Seems this is the major cause of review-zfs test failures.&lt;/p&gt;</comment>
                            <comment id="94693" author="pjones" created="Tue, 23 Sep 2014 04:04:13 +0000"  >&lt;p&gt;Isaac&lt;/p&gt;

&lt;p&gt;This issue continues to disrupt zfs testing. What do you advise?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="94845" author="isaac" created="Wed, 24 Sep 2014 16:13:08 +0000"  >&lt;p&gt;Peter,&lt;/p&gt;

&lt;p&gt;The auto-test results unfortunately didn&apos;t include sufficient ZFS debug information, see &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5674&quot; title=&quot;Maloo test report should include zfs debugging data when when FSTYPE=zfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5674&quot;&gt;&lt;del&gt;TEI-1729&lt;/del&gt;&lt;/a&gt;. I&apos;m working with Jian to manually start a test session and reproduce the failure so I&apos;d be able to collect the debug information needed.&lt;/p&gt;</comment>
                            <comment id="95105" author="isaac" created="Fri, 26 Sep 2014 23:55:18 +0000"  >&lt;p&gt;It seemed like LVM+deadline scheduler could be the cause of IO timeouts, see &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4950&quot; title=&quot;sanity-benchmark test fsx hung: txg_sync was stuck on OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4950&quot;&gt;&lt;del&gt;LU-4950&lt;/del&gt;&lt;/a&gt; for latest results.&lt;/p&gt;</comment>
                            <comment id="95572" author="yujian" created="Thu, 2 Oct 2014 19:47:13 +0000"  >&lt;p&gt;While verifying &lt;a href=&quot;http://review.whamcloud.com/12001&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12001&lt;/a&gt; on Lustre b2_5 branch with FSTYPE=zfs, sanity test 900 hung at unmounting MDS.&lt;/p&gt;

&lt;p&gt;Console log on MDS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;06:27:21:Lustre: DEBUG MARKER: umount -d -f /mnt/mds1
06:27:21:Lustre: lustre-MDT0000: Not available for connect from 10.1.4.26@tcp (stopping)
06:27:21:LustreError: 3264:0:(client.c:1079:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff880036038800 x1480755389930956/t0(0) o13-&amp;gt;lustre-OST0000-osc-MDT0000@10.1.4.26@tcp:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
06:27:21:LustreError: 3264:0:(client.c:1079:ptlrpc_import_delay_req()) Skipped 3 previous similar messages
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff88005bfda240[0x0, 1, [0x1:0x0:0x0] hash exist]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....local_storage@ffff88005bfda298
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osd-zfs@ffff880059957ac0osd-zfs-object@ffff880059957ac0
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff88005bfda240
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff88006891de08[0x0, 0, [0x100020000:0x1d03:0x0] hash lru]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osp@ffff88006891de60osp-object@ffff88006891de08
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff88006891de08
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff88005bfda9c0[0x0, 1, [0x200000003:0x0:0x0] hash exist]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....local_storage@ffff88005bfdaa18
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osd-zfs@ffff880059957ee0osd-zfs-object@ffff880059957ee0
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff88005bfda9c0
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff88005bfda3c0[0x1, 1, [0x200000003:0x2:0x0] hash exist]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....local_storage@ffff88005bfda418
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osd-zfs@ffff880059957cd0osd-zfs-object@ffff880059957cd0
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff88005bfda3c0
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff88005bfda300[0x0, 1, [0x200000003:0x3:0x0] hash exist]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....local_storage@ffff88005bfda358
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osd-zfs@ffff880059957bc8osd-zfs-object@ffff880059957bc8
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff88005bfda300
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff880027ebfc80[0x0, 1, [0xa:0x0:0x0] hash exist]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....local_storage@ffff880027ebfcd8
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osd-zfs@ffff8800599579b8osd-zfs-object@ffff8800599579b8
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff880027ebfc80
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) header@ffff88006891ded0[0x0, 0, [0x100000000:0x1e0e:0x0] hash lru]{
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) ....osp@ffff88006891df28osp-object@ffff88006891ded0
06:27:21:LustreError: 3249:0:(osp_dev.c:777:osp_device_free()) } header@ffff88006891ded0
06:27:21:LustreError: 3249:0:(lu_object.c:1252:lu_device_fini()) ASSERTION( cfs_atomic_read(&amp;amp;d-&amp;gt;ld_ref) == 0 ) failed: Refcount is 1
06:27:21:LustreError: 3249:0:(lu_object.c:1252:lu_device_fini()) LBUG
06:27:21:Pid: 3249, comm: obd_zombid
06:27:22:
06:27:22:Call Trace:
06:27:22: [&amp;lt;ffffffffa05ef895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
06:27:22: [&amp;lt;ffffffffa05efe97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
06:27:22: [&amp;lt;ffffffffa075be88&amp;gt;] lu_device_fini+0xb8/0xc0 [obdclass]
06:27:22: [&amp;lt;ffffffffa076287e&amp;gt;] dt_device_fini+0xe/0x10 [obdclass]
06:27:22: [&amp;lt;ffffffffa104e93f&amp;gt;] osp_device_free+0xff/0x220 [osp]
06:27:22: [&amp;lt;ffffffffa0746e5d&amp;gt;] class_decref+0x46d/0x550 [obdclass]
06:27:22: [&amp;lt;ffffffffa07257df&amp;gt;] obd_zombie_impexp_cull+0x30f/0x5d0 [obdclass]
06:27:22: [&amp;lt;ffffffffa0725b05&amp;gt;] obd_zombie_impexp_thread+0x65/0x190 [obdclass]
06:27:22: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
06:27:22: [&amp;lt;ffffffffa0725aa0&amp;gt;] ? obd_zombie_impexp_thread+0x0/0x190 [obdclass]
06:27:22: [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
06:27:22: [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
06:27:22: [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
06:27:22: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
06:27:22:
06:27:22:Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/6f56a29c-4a07-11e4-95b1-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/6f56a29c-4a07-11e4-95b1-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="95589" author="isaac" created="Thu, 2 Oct 2014 22:20:35 +0000"  >&lt;p&gt;The failure above was different from the zio timeout here in this ticket. The OSS zpools were idle, and on the MDS the zpool seemed to have been exported already (because the txg_sync thread already quit). It might have something to do with the patch being tested, but I want to focus on the zio timeout issue here.&lt;/p&gt;</comment>
                            <comment id="95595" author="yujian" created="Thu, 2 Oct 2014 22:59:28 +0000"  >&lt;p&gt;Hi Isaac,&lt;/p&gt;

&lt;p&gt;According to problem 2) in the earlier comments &lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-5242?focusedCommentId=87897&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-87897&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-5242?focusedCommentId=87897&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-87897&lt;/a&gt; in this ticket, I reported the above failure here.&lt;/p&gt;

&lt;p&gt;I also saw the failure was reported in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3623&quot; title=&quot;lu_device_fini()) ASSERTION( cfs_atomic_read(&amp;amp;d-&amp;gt;ld_ref) == 0 ) failed: Refcount is 1 from osp_device_free&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3623&quot;&gt;&lt;del&gt;LU-3623&lt;/del&gt;&lt;/a&gt;. So, let me track the failure over there then.&lt;/p&gt;</comment>
                            <comment id="95814" author="yujian" created="Tue, 7 Oct 2014 16:24:20 +0000"  >&lt;p&gt;One more instance on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/b2697762-4e12-11e4-9581-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/b2697762-4e12-11e4-9581-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="96457" author="doug" created="Wed, 15 Oct 2014 23:10:28 +0000"  >&lt;p&gt;Same failure has occurred on master branch for test_23a: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/90cd3a7e-53d7-11e4-9c8a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/90cd3a7e-53d7-11e4-9c8a-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="97109" author="hdoreau" created="Thu, 23 Oct 2014 09:46:25 +0000"  >&lt;p&gt;Looks like another instance, on master: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/a40e6a14-5a2f-11e4-8dbb-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/a40e6a14-5a2f-11e4-8dbb-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="97507" author="yujian" created="Sat, 25 Oct 2014 18:03:42 +0000"  >&lt;p&gt;One more instance on master branch: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/b8f4813c-5c25-11e4-b9ce-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/b8f4813c-5c25-11e4-b9ce-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="97883" author="isaac" created="Wed, 29 Oct 2014 21:45:32 +0000"  >&lt;p&gt;I was able to reproduce it today and kept the system live for debugging. The txg_sync thread stuck in zio_wait() but the weird thing was there was no IO pending/running at either the ZFS vdev queue or the Linux block dev queue. At this point I tend to think it&apos;s a ZFS issue rather than Lustre issue.&lt;/p&gt;</comment>
                            <comment id="97890" author="yujian" created="Wed, 29 Oct 2014 22:58:36 +0000"  >&lt;p&gt;One more instance on Lustre b2_5 branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/91627324-5fa7-11e4-895a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/91627324-5fa7-11e4-895a-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="97909" author="isaac" created="Thu, 30 Oct 2014 04:07:07 +0000"  >&lt;p&gt;Likely it can be fixed by this ZFS patch:&lt;br/&gt;
&lt;a href=&quot;https://github.com/zfsonlinux/zfs/pull/2828&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/zfsonlinux/zfs/pull/2828&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I&apos;ll test it soon.&lt;/p&gt;</comment>
                            <comment id="98749" author="dmiter" created="Mon, 10 Nov 2014 08:56:14 +0000"  >&lt;p&gt;One more failure on master: &lt;a href=&quot;https://testing.hpdd.intel.com/sub_tests/bee79924-6861-11e4-a1b6-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/sub_tests/bee79924-6861-11e4-a1b6-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="99058" author="adilger" created="Thu, 13 Nov 2014 18:34:31 +0000"  >&lt;p&gt;Isaac, any update on testing this patch? While we prefer to stick with the upstream releases, we are also able to land a patch into our own ZFS Git repo so that our own testing passes.  If this fixes the problem we are seeing, it is also worthwhile to update the issue on GitHub with this information so that it will speed up the patch landing into a release.&lt;/p&gt;</comment>
                            <comment id="99534" author="isaac" created="Wed, 19 Nov 2014 00:46:23 +0000"  >&lt;p&gt;I&apos;m still troubleshooting some local ZFS test failures after merging the patch. I&apos;ll apply it to Jenkins ZFS build once all tests pass.&lt;/p&gt;</comment>
                            <comment id="100581" author="yujian" created="Wed, 3 Dec 2014 18:28:54 +0000"  >&lt;p&gt;More instance on Lustre b2_5 branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/e3f3287e-7b0c-11e4-8c6d-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/e3f3287e-7b0c-11e4-8c6d-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="100698" author="isaac" created="Thu, 4 Dec 2014 17:27:17 +0000"  >&lt;p&gt;I&apos;ve been testing with a patched ZFS build and so far haven&apos;t been able to reproduce it. But strange thing is, I can&apos;t reproduce it either with a previous build where I could easily reproduce it with sanity test_132. So now I&apos;m not sure whether the patch fixed it, or some other change made it much harder to reproduce. I&apos;m now looking into why I can&apos;t reproduce it any more with the same build/test hw where I was able to easily reproduce it.&lt;/p&gt;</comment>
                            <comment id="101001" author="adilger" created="Mon, 8 Dec 2014 20:04:32 +0000"  >&lt;p&gt;Is it possible to push the upstream GitHub patch into our local ZFS git repository, so that we are always testing with that patch?  That would allow us to get better testing, to see if it fixes this problem, and to see if it introduces some other problems or not.  This is one of the last issues that is preventing review-zfs to be enforced for all patches.&lt;/p&gt;</comment>
                            <comment id="101041" author="bzzz" created="Tue, 9 Dec 2014 04:43:52 +0000"  >&lt;p&gt;here is an example how to add a patch: &lt;a href=&quot;http://git.whamcloud.com/ff/daos_lustre.git/commit/21f403380e365b083cf4e48003e863a1661a5a60&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.whamcloud.com/ff/daos_lustre.git/commit/21f403380e365b083cf4e48003e863a1661a5a60&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="101443" author="gerrit" created="Fri, 12 Dec 2014 13:46:24 +0000"  >&lt;p&gt;Nathaniel Clark (nathaniel.l.clark@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13050&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13050&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; build: Add zfs patch for Illumos 5244&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e5b4e4305c9cdf7375ad5542a7118f3deb4cf7e4&lt;/p&gt;</comment>
                            <comment id="101748" author="gerrit" created="Tue, 16 Dec 2014 19:56:37 +0000"  >&lt;p&gt;Andreas Dilger (andreas.dilger@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13093&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13093&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; tests: except sanity test_132 for ZFS&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 5c867565cdc8d4b1da41ef17e9bc821b63809151&lt;/p&gt;</comment>
                            <comment id="101750" author="adilger" created="Tue, 16 Dec 2014 19:58:51 +0000"  >&lt;p&gt;Based on the test results in &lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/f8d48678-84d7-11e4-985f-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/f8d48678-84d7-11e4-985f-5254006e85c2&lt;/a&gt; it appears that the 13050 patch does not resolve the problem being seen here.  Is there anything new that can be learned from the logs in these failed tests?&lt;/p&gt;

&lt;p&gt;This problem and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4536&quot; title=&quot;sanity test_65ic&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4536&quot;&gt;&lt;del&gt;LU-4536&lt;/del&gt;&lt;/a&gt; are really blocking the ability to enable review-zfs in enforcing mode.  It is passing a large fraction of tests, but these ones are causing the most failures.  Normally I&apos;d say this test should be disabled, but I guess the hang would happen again at some later unmount, though I don&apos;t see that?  Is there something special that this test does with the backing storage that is causing ZFS grief, or is it possible there is a bug in the SOM codepath that is causing a reference leak and hanging the unmount?&lt;/p&gt;

&lt;p&gt;For now, I&apos;ve pushed a patch to disable this test, because SOM is not a supported feature, and it will continue to be tested with ldiskfs anyway.&lt;/p&gt;</comment>
                            <comment id="101789" author="gerrit" created="Wed, 17 Dec 2014 06:15:01 +0000"  >&lt;p&gt;Andreas Dilger (andreas.dilger@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13093/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13093/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; tests: except sanity test_132 for ZFS&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 12bfc80b08d2b70c0052b271bfa2d34995b5892e&lt;/p&gt;</comment>
                            <comment id="101896" author="adilger" created="Thu, 18 Dec 2014 00:27:32 +0000"  >&lt;p&gt;Sadly, it seems now that test_132() was added to the ALWAYS_EXCEPT list, test_133() has now started causing timeouts at OST unmount time:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/7e564d44-85dd-11e4-b909-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/7e564d44-85dd-11e4-b909-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This makes me wonder if there is something happening in an earlier test that is causing a problem (e.g. refcount leak) and any later unmount will hit the problem?  If we could hit this reliably in a shorter test (e.g. just sanity in a loop, even if it only hit 1/5 times), then we might be able to bisect which subtest is causing the problem to isolate the root cause.&lt;/p&gt;</comment>
                            <comment id="101918" author="bzzz" created="Thu, 18 Dec 2014 04:30:45 +0000"  >&lt;p&gt;we could change t-f to remount OST after every sub-test (or randomly) ?&lt;/p&gt;</comment>
                            <comment id="101923" author="adilger" created="Thu, 18 Dec 2014 06:15:54 +0000"  >&lt;p&gt;There used to be the ability to do this in sanity.sh, by defining the &quot;setup&quot; and &quot;cleanup&quot; macros, but I suspect that is long gone.  &lt;/p&gt;</comment>
                            <comment id="101987" author="gerrit" created="Thu, 18 Dec 2014 19:16:17 +0000"  >&lt;p&gt;Nathaniel Clark (nathaniel.l.clark@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13130&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13130&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; tests: DEBUG ONLY unmount/remount every test&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 48c4b3ea65ae7cf69c84df61c3b76f15bb336f5f&lt;/p&gt;</comment>
                            <comment id="102302" author="yong.fan" created="Wed, 24 Dec 2014 23:56:37 +0000"  >&lt;p&gt;Another failure instance:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/c687c060-8bb5-11e4-8220-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/c687c060-8bb5-11e4-8220-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="102450" author="green" created="Wed, 31 Dec 2014 18:11:36 +0000"  >&lt;p&gt;If diagnosing this problem requires some extra data that autotest framework does not collect - a patch needs to be created to collect this data if this is something not easily reproduceable on other systems.&lt;/p&gt;</comment>
                            <comment id="102692" author="adilger" created="Tue, 6 Jan 2015 21:39:09 +0000"  >&lt;p&gt;Looking into the OST syslog from one of the recent &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; failures, I see that sanity test_133g is hanging at OST unmount because of increased refcounts on the export structure, and again this can be traced back to OST threads blocked on starting a ZFS TXG.&lt;/p&gt;

&lt;p&gt;Previous suggestions in this bug and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4716&quot; title=&quot;replay-ost-single test_5: stuck in dbuf_read-&amp;gt;zio_wait&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4716&quot;&gt;&lt;del&gt;LU-4716&lt;/del&gt;&lt;/a&gt; included:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;1. Set zfs_nocacheflush:&lt;br/&gt;
options zfs zfs_nocacheflush=1&lt;br/&gt;
This makes DKIOCFLUSHWRITECACHE a noop. Then the deadlock may move elsewhere or simply disappear.&lt;br/&gt;
2. Whether zfs_nocacheflush is set or not, once the timeout happens, it&apos;d be useful to gather &quot;zpool events -v&quot; outputs on the OSS, which&apos;d give more details on the state of the stuck zio.&lt;/p&gt;&lt;/blockquote&gt;
&lt;blockquote&gt;
&lt;p&gt;I think it makes sense to make sure that our test system:&lt;br/&gt;
1. Use whole disks for zfs pools on guest VMs.&lt;br/&gt;
2. Use noop IO scheduler for corresponding disks on host OS.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Can any of these options be tested/implemented in some reasonable manner?&lt;br/&gt;
Minh, is there some way to check what the IO scheduler is for the VM host system?&lt;/p&gt;

&lt;p&gt;I&apos;d like to get some forward progress on these issues.&lt;/p&gt;</comment>
                            <comment id="102725" author="adilger" created="Wed, 7 Jan 2015 08:20:56 +0000"  >&lt;p&gt;I&apos;ve noticed in the test_133g hangs that there is a watchdog timeout in test_116a that indicates the source of the deadlock is at that point or earlier (this is easily seen in the OST dmesg log, but doesn&apos;t have any timestamps):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== sanity test 116a: stripe QOS: free space balance ===================== 18:41:05 (1420224065)
INFO: task ll_ost00_017:10306 blocked for more than 120 seconds.
ll_ost00_017  D 0000000000000001     0 10306      2 0x00000080
Call Trace:
 [&amp;lt;ffffffff8152bd65&amp;gt;] rwsem_down_failed_common+0x95/0x1d0
 [&amp;lt;ffffffff8152bec3&amp;gt;] rwsem_down_write_failed+0x23/0x30
 [&amp;lt;ffffffff8128fbe3&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
 [&amp;lt;ffffffff8152b3c2&amp;gt;] ? down_write+0x32/0x40
 [&amp;lt;ffffffffa0e79d81&amp;gt;] osd_object_write_lock+0x61/0x70 [osd_zfs]
 [&amp;lt;ffffffffa0fbf076&amp;gt;] ofd_object_destroy+0x66/0x8e0 [ofd]
 [&amp;lt;ffffffffa0fb964d&amp;gt;] ofd_destroy_by_fid+0x35d/0x620 [ofd]
 [&amp;lt;ffffffffa0fb2f6a&amp;gt;] ofd_destroy_hdl+0x2fa/0xb60 [ofd]
 [&amp;lt;ffffffffa09df97e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
 [&amp;lt;ffffffffa098f711&amp;gt;] ptlrpc_main+0xe41/0x1960 [ptlrpc]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Unfortunately the timestamps on the console logs are not very accurate (they appear to only update once a minute or so), and this is a long-running test (about 200s), so test_116a may in fact be the source of the problem itself.&lt;/p&gt;

&lt;p&gt;I looked at all 20 of the test_133g hang OST dmesg logs from the past two weeks, and in every case the first test that is reporting this watchdog is test_116a, which itself takes about 200s to run.  That makes it very likely that test_116a (or a test run just before it) is the source of the problem, since the previous tests can vary by a few seconds, so if it were a long time before the start of test_116a in some cases the watchdog message would have appeared during an earlier test.&lt;/p&gt;</comment>
                            <comment id="102729" author="gerrit" created="Wed, 7 Jan 2015 10:14:01 +0000"  >&lt;p&gt;Andreas Dilger (andreas.dilger@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13264&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13264&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; tests: skip sanity test_116a for ZFS&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 691009c5c0e38317724cdac764080fa45ad96667&lt;/p&gt;</comment>
                            <comment id="103054" author="isaac" created="Fri, 9 Jan 2015 19:34:04 +0000"  >&lt;p&gt;I just managed to reproduce it and the symptoms matched ZoL bug 2523, which was recently fixed by SPL patch:&lt;br/&gt;
&lt;a href=&quot;https://github.com/zfsonlinux/spl/commit/a3c1eb77721a0d511b4fe7111bb2314686570c4b&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/zfsonlinux/spl/commit/a3c1eb77721a0d511b4fe7111bb2314686570c4b&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Nathaniel is working on updating our ZFS/SPL version to 0.6.3-1.2 in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6105&quot; title=&quot;Update ZFS/SPL version to 0.6.3-1.2&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6105&quot;&gt;&lt;del&gt;LU-6105&lt;/del&gt;&lt;/a&gt;, which includes the fix above.&lt;/p&gt;</comment>
                            <comment id="103471" author="adilger" created="Wed, 14 Jan 2015 15:14:25 +0000"  >&lt;p&gt;Unfortunately, the upgrade to ZFS 0.6.3-1.2 doesn&apos;t seem to have solved this problem:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/19c8b610-9bc1-11e4-857a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/19c8b610-9bc1-11e4-857a-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;It might be that there are multiple issues at work here?  I haven&apos;t checked the latest failures to see if they have different symptoms or not.&lt;/p&gt;</comment>
                            <comment id="103517" author="isaac" created="Wed, 14 Jan 2015 20:00:26 +0000"  >&lt;p&gt;The failures looked similar - txg_sync thread stuck in zio_wait(). Either there were multiple causes of this same symptom or the upstream fix didn&apos;t completely close the race. I&apos;m looking into that. Meanwhile, I think we&apos;d go ahead and land it and see if it&apos;d reduce its occurrences, since quite some ZoL users reported that the fix did seem to work for them.&lt;/p&gt;</comment>
                            <comment id="103589" author="gerrit" created="Thu, 15 Jan 2015 14:00:31 +0000"  >&lt;p&gt;Nathaniel Clark (nathaniel.l.clark@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13416&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13416&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; tests: DEBUG ONLY obd exports after every test&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b19ed84cad9fe49f79bfc1096ec15141f70d16f0&lt;/p&gt;</comment>
                            <comment id="103687" author="isaac" created="Thu, 15 Jan 2015 22:32:53 +0000"  >&lt;p&gt;I just had a close look at:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Although the txg_sync thread was stuck in D state in zio_wait(), it probably wan&apos;t the cause of the test hang. Here&apos;s what happened:&lt;br/&gt;
1. At the start of sanity test 116a, 20:16:13, both ll_ost00_013 and ll_ost00_015 were blocked in D state in osd_object_write_lock().&lt;br/&gt;
2. And they kept blocked there and never recovered. These two threads would probably hold some references that prevented the OST from being umounted.&lt;/p&gt;

&lt;p&gt;It was unlikely that the two threads hanged because of txg_sync hang in zio_wait, as we&apos;ve seen previously, because if so:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;There&apos;d have been similar messages like the txg_sync thread blocked for more than 120 seconds, but there was none.&lt;/li&gt;
	&lt;li&gt;With the txg_sync thread blocked, no write to the ost could complete, in fact no transaction would be open. The OST would simply get stuck there. However, the tests went on OK until sanity test 133g at 20:27.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;So I&apos;d tend to say that the test hang was a result of the hanged ll_ost threads, rather than the txg_sync zio_wait deadlock.&lt;/p&gt;

&lt;p&gt;The txg_sync thread did show as D state in zio_wait at the stack dump after sanity test 133g wouldn&apos;t complete. But very likely it just happened to be there waiting for some IOs to complete on the slow VM disks. It&apos;s quite often to see the txg_sync thread blocked for IO completion for more than 20 seconds on the test VMs. On the same OSS I saw ll_ost00_011 blocked for 40 seconds in txg_wait_open() at 20:09:38 and then recovered - so the txg_sync thread must have been blocked for IO roughly 40 seconds and moved on. That&apos;s long enough a window for the txg_sync thread to just happen to be in D state in zio_wait() when the stacks are dumped.&lt;/p&gt;

&lt;p&gt;So, I&apos;d still suggest to merge the patch to upgrade to ZFS 0.6.3-1.2. So far there&apos;s been no new report of zio_wait hang on ZoL since the SPL fix was merged.&lt;/p&gt;</comment>
                            <comment id="103700" author="adilger" created="Fri, 16 Jan 2015 01:46:03 +0000"  >&lt;p&gt;I tried commenting out test_116a in &lt;a href=&quot;http://review.whamcloud.com/13264&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13264&lt;/a&gt;, but that also hit stuck threads with the same stack trace in test_120g (twice, and that test runs for about 55s), and in test_120b (once, and it only runs for 1s - &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/6903d29a-983e-11e4-8f7b-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/6903d29a-983e-11e4-8f7b-5254006e85c2&lt;/a&gt;).  Using the test_120b stack dump time, it puts the hang back in test_103c or earlier.  Using the test_120g stack dump time (start/end window) it puts the failure in test_118c-test_118k or earlier, so there is no overlap.  It &lt;em&gt;seems&lt;/em&gt; like there is a specific test or behaviour that is triggering this, since the stacks are always dumped in test_116a (except when I had excluded it).  If it was just a random race condition, it would appear in other tests as well.&lt;/p&gt;</comment>
                            <comment id="103709" author="isaac" created="Fri, 16 Jan 2015 03:16:47 +0000"  >&lt;p&gt;I just noticed that the ORI-616 fix was missing on master. I&apos;ve asked for confirmation whether it&apos;d be needed still. If yes, then that might explain some of the symptoms here as a refcount was missing.&lt;/p&gt;</comment>
                            <comment id="103726" author="gerrit" created="Fri, 16 Jan 2015 04:16:27 +0000"  >&lt;p&gt;Isaac Huang (he.huang@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13431&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13431&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: verify ref counting&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 22dc2ec61c815d532c507c68e871f4aa0471c423&lt;/p&gt;</comment>
                            <comment id="103742" author="adilger" created="Fri, 16 Jan 2015 14:07:52 +0000"  >&lt;p&gt;Isaac, given how often this bug is being hit (about 50% of all review-zfs test runs), it is worthwhile to just try porting the patch to master and running a bunch of sanity tests via &lt;tt&gt;Test-Parameters:&lt;/tt&gt; to see if it passes or fails.&lt;/p&gt;</comment>
                            <comment id="103894" author="adilger" created="Mon, 19 Jan 2015 19:00:35 +0000"  >&lt;p&gt;Unfortunately, I don&apos;t think that the ORI-616 patch is relevant.  It looks like the ORI-616 patch &lt;a href=&quot;http://review.whamcloud.com/2560&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2560&lt;/a&gt; that adds the lu_object_get() call in osd_bufs_get() is later dropped by ORI-645 patch &lt;a href=&quot;http://review.whamcloud.com/2607&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2607&lt;/a&gt; (presumably intentionally).&lt;/p&gt;</comment>
                            <comment id="104255" author="utopiabound" created="Wed, 21 Jan 2015 19:53:21 +0000"  >&lt;p&gt;I&apos;ve just hit this on my local VM setup and txg_sync is what seems to be holding up the umount of the OST.  Attached is a dump of `zpool events -v` from the OSS.&lt;/p&gt;

&lt;p&gt;Below is a dump I created blocked processes using sysrq:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;SysRq : Show Blocked State
  task                        PC stack   pid father
ll_ost01_004  D 0000000000000001     0  9350      2 0x00000080
 ffff8800052e5960 0000000000000046 0000000000016880 0000000000000001
 ffffc90008afb350 0000000000000082 ffff880024e73588 ffff880001d1fd80
 ffff880005337098 ffff8800052e5fd8 000000000000fbc8 ffff880005337098
Call Trace:
 [&amp;lt;ffffffff8109b1ee&amp;gt;] ? prepare_to_wait_exclusive+0x4e/0x80
 [&amp;lt;ffffffffa02d847d&amp;gt;] cv_wait_common+0xed/0x100 [spl]
 [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa02d84e5&amp;gt;] __cv_wait+0x15/0x20 [spl]
 [&amp;lt;ffffffffa03d7f7b&amp;gt;] txg_wait_open+0x7b/0xa0 [zfs]
 [&amp;lt;ffffffffa039f95e&amp;gt;] dmu_tx_wait+0x29e/0x2b0 [zfs]
 [&amp;lt;ffffffff8152a29e&amp;gt;] ? mutex_lock+0x1e/0x50
 [&amp;lt;ffffffffa039fa01&amp;gt;] dmu_tx_assign+0x91/0x490 [zfs]
 [&amp;lt;ffffffffa070f5ad&amp;gt;] osd_trans_start+0xed/0x430 [osd_zfs]
 [&amp;lt;ffffffffa0d9504c&amp;gt;] ofd_trans_start+0x7c/0x100 [ofd]
 [&amp;lt;ffffffffa0d97500&amp;gt;] ofd_object_destroy+0x3d0/0x8e0 [ofd]
 [&amp;lt;ffffffffa0d9176d&amp;gt;] ofd_destroy_by_fid+0x35d/0x620 [ofd]
 [&amp;lt;ffffffffa07a4ee0&amp;gt;] ? ldlm_blocking_ast+0x0/0x180 [ptlrpc]
 [&amp;lt;ffffffffa07a6780&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
 [&amp;lt;ffffffffa0d8b0ba&amp;gt;] ofd_destroy_hdl+0x2fa/0xb60 [ofd]
 [&amp;lt;ffffffffa083396e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
 [&amp;lt;ffffffffa07e3701&amp;gt;] ptlrpc_main+0xe41/0x1960 [ptlrpc]
 [&amp;lt;ffffffffa07e28c0&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
ll_ost01_014  D 0000000000000001     0 23470      2 0x00000080
 ffff88000e1e9a70 0000000000000046 ffff8800186e4090 ffff88000e1e9a50
 ffffffffa08617d3 0000000000000000 ffff88000e1e9fd8 ffff8800056d8040
 ffff8800056d85f8 ffff88000e1e9fd8 000000000000fbc8 ffff8800056d85f8
Call Trace:
 [&amp;lt;ffffffff8152b315&amp;gt;] rwsem_down_failed_common+0x95/0x1d0
 [&amp;lt;ffffffff8152b473&amp;gt;] rwsem_down_write_failed+0x23/0x30
 [&amp;lt;ffffffff8128f683&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
 [&amp;lt;ffffffff8152a972&amp;gt;] ? down_write+0x32/0x40
 [&amp;lt;ffffffffa07171a1&amp;gt;] osd_object_write_lock+0x61/0x70 [osd_zfs]
 [&amp;lt;ffffffffa0d97196&amp;gt;] ofd_object_destroy+0x66/0x8e0 [ofd]
 [&amp;lt;ffffffffa0d9176d&amp;gt;] ofd_destroy_by_fid+0x35d/0x620 [ofd]
 [&amp;lt;ffffffffa07a4ee0&amp;gt;] ? ldlm_blocking_ast+0x0/0x180 [ptlrpc]
 [&amp;lt;ffffffffa07a6780&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
 [&amp;lt;ffffffffa0d8b0ba&amp;gt;] ofd_destroy_hdl+0x2fa/0xb60 [ofd]
 [&amp;lt;ffffffffa083396e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
 [&amp;lt;ffffffffa07e3701&amp;gt;] ptlrpc_main+0xe41/0x1960 [ptlrpc]
 [&amp;lt;ffffffffa07e28c0&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
ll_ost01_016  D 0000000000000001     0 27741      2 0x00000080
 ffff88000d2c7a70 0000000000000046 ffff8800186e4090 ffff88000d2c7a50
 ffffffffa08617d3 0000000000000000 ffff88000d2c7fd8 ffff8800054f6080
 ffff8800054f6638 ffff88000d2c7fd8 000000000000fbc8 ffff8800054f6638
Call Trace:
 [&amp;lt;ffffffff8152b315&amp;gt;] rwsem_down_failed_common+0x95/0x1d0
 [&amp;lt;ffffffff8152b473&amp;gt;] rwsem_down_write_failed+0x23/0x30
 [&amp;lt;ffffffff8128f683&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
 [&amp;lt;ffffffff8152a972&amp;gt;] ? down_write+0x32/0x40
 [&amp;lt;ffffffffa07171a1&amp;gt;] osd_object_write_lock+0x61/0x70 [osd_zfs]
 [&amp;lt;ffffffffa0d97196&amp;gt;] ofd_object_destroy+0x66/0x8e0 [ofd]
 [&amp;lt;ffffffffa0d9176d&amp;gt;] ofd_destroy_by_fid+0x35d/0x620 [ofd]
 [&amp;lt;ffffffffa07a4ee0&amp;gt;] ? ldlm_blocking_ast+0x0/0x180 [ptlrpc]
 [&amp;lt;ffffffffa07a6780&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
 [&amp;lt;ffffffffa0d8b0ba&amp;gt;] ofd_destroy_hdl+0x2fa/0xb60 [ofd]
 [&amp;lt;ffffffffa083396e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
 [&amp;lt;ffffffffa07e3701&amp;gt;] ptlrpc_main+0xe41/0x1960 [ptlrpc]
 [&amp;lt;ffffffffa07e28c0&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
txg_sync      D 0000000000000000     0 48138      2 0x00000080
 ffff88000a53dba0 0000000000000046 0000000000000000 0000000000000000
 ffff88000a53db10 ffffffff81061d12 ffff88000a53db60 ffffffff810546b9
 ffff880024e73af8 ffff88000a53dfd8 000000000000fbc8 ffff880024e73af8
Call Trace:
 [&amp;lt;ffffffff81061d12&amp;gt;] ? default_wake_function+0x12/0x20
 [&amp;lt;ffffffff810546b9&amp;gt;] ? __wake_up_common+0x59/0x90
 [&amp;lt;ffffffff810a6d21&amp;gt;] ? ktime_get_ts+0xb1/0xf0
 [&amp;lt;ffffffff81529433&amp;gt;] io_schedule+0x73/0xc0
 [&amp;lt;ffffffffa02d841c&amp;gt;] cv_wait_common+0x8c/0x100 [spl]
 [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa02d84a8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
 [&amp;lt;ffffffffa041e0ab&amp;gt;] zio_wait+0xfb/0x1b0 [zfs]
 [&amp;lt;ffffffffa03b3be3&amp;gt;] dsl_pool_sync+0x2b3/0x3f0 [zfs]
 [&amp;lt;ffffffffa03cbe4b&amp;gt;] spa_sync+0x40b/0xa60 [zfs]
 [&amp;lt;ffffffff810546b9&amp;gt;] ? __wake_up_common+0x59/0x90
 [&amp;lt;ffffffffa03d8916&amp;gt;] txg_sync_thread+0x2e6/0x510 [zfs]
 [&amp;lt;ffffffff810591a9&amp;gt;] ? set_user_nice+0xc9/0x130
 [&amp;lt;ffffffffa03d8630&amp;gt;] ? txg_sync_thread+0x0/0x510 [zfs]
 [&amp;lt;ffffffffa02d3c2f&amp;gt;] thread_generic_wrapper+0x5f/0x70 [spl]
 [&amp;lt;ffffffffa02d3bd0&amp;gt;] ? thread_generic_wrapper+0x0/0x70 [spl]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
umount        D 0000000000000001     0 60033  60032 0x00000080
 ffff88001d44da98 0000000000000082 ffff88001d44d9f8 ffff8800182ec000
 ffffffffa151f501 0000000000000000 ffff880001cdb14c ffffffffa151f501
 ffff8800136525f8 ffff88001d44dfd8 000000000000fbc8 ffff8800136525f8
Call Trace:
 [&amp;lt;ffffffff81529ac2&amp;gt;] schedule_timeout+0x192/0x2e0
 [&amp;lt;ffffffff81083f30&amp;gt;] ? process_timeout+0x0/0x10
 [&amp;lt;ffffffffa14aeba6&amp;gt;] obd_exports_barrier+0xb6/0x190 [obdclass]
 [&amp;lt;ffffffffa0d8281f&amp;gt;] ofd_device_fini+0x5f/0x260 [ofd]
 [&amp;lt;ffffffffa14d0b62&amp;gt;] class_cleanup+0x552/0xd10 [obdclass]
 [&amp;lt;ffffffffa14b0b26&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
 [&amp;lt;ffffffffa14d330a&amp;gt;] class_process_config+0x1fea/0x27c0 [obdclass]
 [&amp;lt;ffffffffa0ef71b1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa14cc315&amp;gt;] ? lustre_cfg_new+0x435/0x630 [obdclass]
 [&amp;lt;ffffffffa14d3c01&amp;gt;] class_manual_cleanup+0x121/0x870 [obdclass]
 [&amp;lt;ffffffffa14b0b26&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
 [&amp;lt;ffffffffa150c457&amp;gt;] server_put_super+0xb37/0xe50 [obdclass]
 [&amp;lt;ffffffff8118b0cb&amp;gt;] generic_shutdown_super+0x5b/0xe0
 [&amp;lt;ffffffff8118b1b6&amp;gt;] kill_anon_super+0x16/0x60
 [&amp;lt;ffffffffa14d5e56&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
 [&amp;lt;ffffffff8118b957&amp;gt;] deactivate_super+0x57/0x80
 [&amp;lt;ffffffff811ab35f&amp;gt;] mntput_no_expire+0xbf/0x110
 [&amp;lt;ffffffff811abeab&amp;gt;] sys_umount+0x7b/0x3a0
 [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;iotop shows that txg_sync running writes to disk and top shows this:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND                                         
48138 root       0 -20     0    0    0 R 64.4  0.0 102:13.51 txg_sync                                         
48060 root      39  19     0    0    0 S 41.2  0.0  64:16.60 z_null_iss/0                                     
48137 root       0 -20     0    0    0 S 23.9  0.0  37:43.30 txg_quiesce                                      
 9350 root      20   0     0    0    0 R  5.3  0.0   8:33.88 ll_ost01_004                                     
   22 root      20   0     0    0    0 S  0.3  0.0  36:24.79 kblockd/0                                        
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="104365" author="bzzz" created="Thu, 22 Jan 2015 19:33:32 +0000"  >&lt;p&gt;Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81061d12&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x12/0x20&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810546b9&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up_common+0x59/0x90&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a6d21&amp;gt;&amp;#93;&lt;/span&gt; ? ktime_get_ts+0xb1/0xf0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81529433&amp;gt;&amp;#93;&lt;/span&gt; io_schedule+0x73/0xc0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa02d841c&amp;gt;&amp;#93;&lt;/span&gt; cv_wait_common+0x8c/0x100 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109afa0&amp;gt;&amp;#93;&lt;/span&gt; ? autoremove_wake_function+0x0/0x40&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa02d84a8&amp;gt;&amp;#93;&lt;/span&gt; __cv_wait_io+0x18/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa041e0ab&amp;gt;&amp;#93;&lt;/span&gt; zio_wait+0xfb/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03b3be3&amp;gt;&amp;#93;&lt;/span&gt; dsl_pool_sync+0x2b3/0x3f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03cbe4b&amp;gt;&amp;#93;&lt;/span&gt; spa_sync+0x40b/0xa60 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810546b9&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up_common+0x59/0x90&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03d8916&amp;gt;&amp;#93;&lt;/span&gt; txg_sync_thread+0x2e6/0x510 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;so, it&apos;s still in DMU doing I/O ..&lt;/p&gt;</comment>
                            <comment id="104396" author="isaac" created="Thu, 22 Jan 2015 20:40:31 +0000"  >&lt;p&gt;Nathan, if iotop showed txg_sync issuing writes, it could be just wait for IO on the slow VM. It&apos;s quite often to see txg_sync to block for 10s of seconds waiting for IO to complete on VMs. The important thing is how long it keeps blocked there. If it blocks there for hours and there was no active IO going on, then it&apos;s an instance of the zio race we&apos;ve seen previously. Otherwise it could be just a red herring.&lt;/p&gt;

&lt;p&gt;Also, it&apos;s not clear whether ll_ost01_014 and ll_ost01_016 blocked before the txg_sync or after. In the case of &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&lt;/a&gt; the two ost threads blocked in similar places but far before the txg_sync thread was blocked.&lt;/p&gt;</comment>
                            <comment id="104398" author="isaac" created="Thu, 22 Jan 2015 20:43:37 +0000"  >&lt;p&gt;Alex, yes it&apos;s still in DMU doing I/O. But it&apos;s quite often to see txg_sync to block for 10s of seconds waiting for IO to complete on VMs. In the case of &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/deca9712-9bc1-11e4-857a-5254006e85c2&lt;/a&gt; the two ost threads blocked in similar places but far before the txg_sync thread was blocked.&lt;/p&gt;</comment>
                            <comment id="104402" author="bzzz" created="Thu, 22 Jan 2015 20:48:05 +0000"  >&lt;p&gt;true, but then I&apos;d expect only those to left at some point? while in all the cases we get that txg_sync() and another thread with zio_wait()?&lt;/p&gt;</comment>
                            <comment id="104408" author="bzzz" created="Thu, 22 Jan 2015 21:08:17 +0000"  >&lt;p&gt;would it be possible to get a crash dump?&lt;/p&gt;</comment>
                            <comment id="104629" author="isaac" created="Sun, 25 Jan 2015 06:34:34 +0000"  >&lt;p&gt;I just reproduced it and ll_ost00_017 was stuck in D state (umount ost1 also stuck):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[&amp;lt;ffffffffa011b415&amp;gt;] cv_wait_common+0x125/0x150 [spl]
[&amp;lt;ffffffffa011b495&amp;gt;] __cv_wait+0x15/0x20 [spl]
[&amp;lt;ffffffffa02215e3&amp;gt;] txg_wait_open+0xa3/0xf0 [zfs]
[&amp;lt;ffffffffa01e4939&amp;gt;] dmu_tx_wait+0x389/0x390 [zfs]
[&amp;lt;ffffffffa01e49e9&amp;gt;] dmu_tx_assign+0xa9/0x520 [zfs]
[&amp;lt;ffffffffa0e6d5ad&amp;gt;] osd_trans_start+0xed/0x430 [osd_zfs]
[&amp;lt;ffffffffa0fb907c&amp;gt;] ofd_trans_start+0x7c/0x100 [ofd]
[&amp;lt;ffffffffa0fbb530&amp;gt;] ofd_object_destroy+0x3d0/0x8e0 [ofd]
[&amp;lt;ffffffffa0fb579d&amp;gt;] ofd_destroy_by_fid+0x35d/0x620 [ofd]
[&amp;lt;ffffffffa0faf0ea&amp;gt;] ofd_destroy_hdl+0x2fa/0xb60 [ofd]
[&amp;lt;ffffffffa09dbd9e&amp;gt;] tgt_request_handle+0x8be/0x1000 [ptlrpc]
[&amp;lt;ffffffffa098b891&amp;gt;] ptlrpc_main+0xe41/0x1960 [ptlrpc]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;However, it was actually running in D rather than sleeping, using lots of CPU time:&lt;br/&gt;
 8058 root      20   0     0    0    0 D  8.3  0.0  68:01.97 ll_ost00_017&lt;/p&gt;

&lt;p&gt;So it looked like cv_wait_common() was actually spinning on a damaged lock. Also the txgs of both ost pools were moving forward so it&apos;s not the txg_sync hang we&apos;ve seen previously.&lt;/p&gt;

&lt;p&gt;I&apos;ve never seen any report like this on ZoL, so I&apos;d tend to think it&apos;s related to the way we use ZoL. I wonder whether it could be some shutdown race where umount process had freed some resource that ll_ost00_017 was trying to use, then cv_wait_common() just got stuck on a damaged lock. The umount stack:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[&amp;lt;ffffffffa0724bd6&amp;gt;] obd_exports_barrier+0xb6/0x190 [obdclass]
[&amp;lt;ffffffffa0fa684f&amp;gt;] ofd_device_fini+0x5f/0x260 [ofd]
[&amp;lt;ffffffffa0746072&amp;gt;] class_cleanup+0x552/0xd10 [obdclass]
[&amp;lt;ffffffffa074881a&amp;gt;] class_process_config+0x1fea/0x27c0 [obdclass]
[&amp;lt;ffffffffa0749111&amp;gt;] class_manual_cleanup+0x121/0x870 [obdclass]
[&amp;lt;ffffffffa07819d7&amp;gt;] server_put_super+0xb37/0xe50 [obdclass]
[&amp;lt;ffffffff8118b61b&amp;gt;] generic_shutdown_super+0x5b/0xe0
[&amp;lt;ffffffff8118b706&amp;gt;] kill_anon_super+0x16/0x60
[&amp;lt;ffffffffa074b366&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
[&amp;lt;ffffffff8118bea7&amp;gt;] deactivate_super+0x57/0x80
[&amp;lt;ffffffff811ab8af&amp;gt;] mntput_no_expire+0xbf/0x110
[&amp;lt;ffffffff811ac3fb&amp;gt;] sys_umount+0x7b/0x3a0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It appeared that class_cleanup()=&amp;gt;obd_precleanup() has been called, and if ldto_device_fini() was called as well, i.e. osd_device_fini() was called and dmu_objset_disown(o-&amp;gt;od_os) called. At the same time, osd_trans_start() could still access od_os from dmu_tx_assign(oh-&amp;gt;ot_tx, TXG_WAIT).&lt;/p&gt;

&lt;p&gt;If there could be a race between osd_device_fini() and osd_trans_start(), that&apos;d explain what I see here. Comments very welcomed.&lt;/p&gt;</comment>
                            <comment id="104630" author="isaac" created="Sun, 25 Jan 2015 06:44:46 +0000"  >&lt;p&gt;Also, the pool that umount was trying to umount seemed to be messed up:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;cat lustre-ost1/txgs; sleep 1; cat lustre-ost1/txgs
469 0 0x01 3 336 31269437119932 94328219768999
txg      birth            state ndirty       nread        nwritten     reads    writes   otime        qtime        wtime        stime       
504280372 94327859132340   S     0            0            0            0        0        52962        5155         34018        0           
504280373 94327859185302   W     0            0            0            0        0        42311        5308         0            0           
504280374 94327859227613   O     0            0            0            0        0        0            0            0            0           
469 0 0x01 3 336 31269437119932 94329224682995
txg      birth            state ndirty       nread        nwritten     reads    writes   otime        qtime        wtime        stime       
504282558 94329223042758   S     0            0            0            0        0        819957       7673         68244        0           
504282559 94329223862715   W     0            0            0            0        0        119631       5697         0            0           
504282560 94329223982346   O     0            0            0            0        0        0            0            0            0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The TXG # increased by 2186 in just 1 second - at the same time, LNet sent/got no message and all OST threads were idle - very weird. Again, no similar report was found on ZoL. Likely we&apos;re doing something wrong here.&lt;/p&gt;</comment>
                            <comment id="104631" author="bzzz" created="Sun, 25 Jan 2015 07:58:44 +0000"  >&lt;p&gt;class_cleanup() calls class_disconnect_exports() before going into precleanup process. so all the exports should be disconnected, then obd_exports_barrier() is called to ensure all exports are gone. and only after that in ofd_stack_fini() we disconnect from OSD so OSD can start to umount. I think there should be no racing dmu_objset_disown() here. also notice that dsl_dataset_disown() doesn&apos;t really do anything like &quot;umount&quot;, it just mark the objset as &quot;not used&quot; while all txg logic is driven a level higher - as a whole pool.&lt;/p&gt;</comment>
                            <comment id="104655" author="adilger" created="Mon, 26 Jan 2015 01:14:57 +0000"  >&lt;p&gt;Brian and Olaf were looking at this bug on Friday and was going to post, but since he didn&apos;t get a chance to do so yet I&apos;ll recap their findings as best I can. When ZFS is compiled with ASSERT checks enabled there is a test failure around sanity subtest 100 that shows a bonus buffer being modified without a lock held on it. Judging by the stack traces in test_116, this shows a possible root cause for the stuck threads and later unmount failure. &lt;/p&gt;</comment>
                            <comment id="104658" author="adilger" created="Mon, 26 Jan 2015 01:50:15 +0000"  >&lt;p&gt;Brian filed that as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6155&quot; title=&quot;osd_count_not_mapped() calls dbuf_hold_impl() without the lock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6155&quot;&gt;&lt;del&gt;LU-6155&lt;/del&gt;&lt;/a&gt; but it is worthwhile investigating as the root cause of this bug also, because this systematically hits only in sanity and not randomly in other tests. &lt;/p&gt;</comment>
                            <comment id="104723" author="isaac" created="Mon, 26 Jan 2015 18:46:07 +0000"  >&lt;p&gt;I had debugging on for SPL/ZFS, but was able to hit the hang without hitting any ZFS assertion.&lt;/p&gt;

&lt;p&gt;I checked the kernel hung task watcher code and it looked like it&apos;d not warn for tasks running in D state. The ll_ost00_017 thread likely hung on txg_wait_open() long before the umount started, judging from the CPU time it had used. That&apos;d also explain why there were two other OST threads hanging in osd_object_write_lock() but there was no error message for any thread blocked holding the lock.&lt;/p&gt;

&lt;p&gt;I&apos;ll try to narrow down to subtests earlier than 133. Would it be possible to force OST umount/remount after each test? That&apos;d pinpoint exactly when SHTF.&lt;/p&gt;</comment>
                            <comment id="104806" author="adilger" created="Tue, 27 Jan 2015 02:50:25 +0000"  >&lt;p&gt;Isaac, how recent was your testing?  I wonder if we were hitting multiple different problems here, maybe one was fixed with 0.6.3-1.2 and there is a second problem causing the threads to be stuck and dump in test_116?&lt;/p&gt;

&lt;p&gt;Unfortunately, Nathaniel already ran tests with an unmount after every subtest (see his debug patch earlier in this bug) but AFAIK this didn&apos;t find the problem that said, I didn&apos;t check the results of this patch myself.  It may be that unmounting after every subtest didn&apos;t allow memory pressure to build up and his the problem.&lt;/p&gt;

&lt;p&gt;I also had a patch on this bug to disable test_116 to see if it was the source of the problem, since it runs more than 120 seconds itself and the watchdogs could be triggered by hangs after the start of the test.  However, the stack traces moved to test_118, but at least have me a bit better idea where the stuck threads first got stuck. It looks like it was around test_103 or so, but it is hard to know because the console messages do not have any timestamps.&lt;/p&gt;

&lt;p&gt;It might be more productive to try a few patches to disable groups of subtests around 100 to 103, each with Test-Parameters that have 6-10 sanity runs to have a high confidence that the problem is caught. &lt;/p&gt;</comment>
                            <comment id="104816" author="isaac" created="Tue, 27 Jan 2015 04:14:51 +0000"  >&lt;p&gt;The one where txg_sync hanged in D state while there&apos;s no pending IO was fixed - I was able to hit it often before 0.6.3-1.2 but not once after. Now I&apos;m seeing one OST thread blocked in D state in txg_wait_open() forever, while txg_sync was busy doing IO and TXGs of the pool were moving forward.&lt;/p&gt;

&lt;p&gt;I&apos;m now testing with a temporary workaround for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6155&quot; title=&quot;osd_count_not_mapped() calls dbuf_hold_impl() without the lock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6155&quot;&gt;&lt;del&gt;LU-6155&lt;/del&gt;&lt;/a&gt;. I was able to reproduce it roughly 1 out of 4 sanity runs, so we&apos;ll soon know whether &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6155&quot; title=&quot;osd_count_not_mapped() calls dbuf_hold_impl() without the lock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6155&quot;&gt;&lt;del&gt;LU-6155&lt;/del&gt;&lt;/a&gt; is to blame or not.&lt;/p&gt;</comment>
                            <comment id="104850" author="isaac" created="Tue, 27 Jan 2015 16:37:19 +0000"  >&lt;p&gt;The &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6155&quot; title=&quot;osd_count_not_mapped() calls dbuf_hold_impl() without the lock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6155&quot;&gt;&lt;del&gt;LU-6155&lt;/del&gt;&lt;/a&gt; workaround didn&apos;t work, hit the same hang 1 out of 6 sanity runs.&lt;/p&gt;</comment>
                            <comment id="104919" author="utopiabound" created="Tue, 27 Jan 2015 22:06:14 +0000"  >&lt;p&gt;Attempting to bisect cause, umount/mount of osts works prior to test 50 and fails prior to test 100:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/13130/7&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13130/7&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/e8dac624-9ce2-11e4-b50c-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/e8dac624-9ce2-11e4-b50c-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="105277" author="isaac" created="Fri, 30 Jan 2015 22:16:43 +0000"  >&lt;p&gt;I wrote a script to watch for stuck OSS threads and found in one failed test:&lt;br/&gt;
1. At test 56w about 02:52, the OSS began to get busy, with threads stuck waiting for IO in txg_wait_synced(). But threads would still recover and go on.&lt;br/&gt;
2. At test 79 about 03:25, one OSS thread (21277) got stuck in txg_wait_open() and it never recovered. It was in D state but actually running, which was why the kernel hung task watcher never warned about it.&lt;br/&gt;
3. At test 103b about 03:40, OST thread 21285 got stuck in ofd_object_destroy()=&amp;gt;osd_object_write_lock()=&amp;gt;call_rwsem_down_write_failed() and never recovered, at the same time OSS dmesg showed:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: == sanity test 103b: MDS mount option &apos;noacl&apos; == 03:40:06 (1422618006)                                   
LustreError: 11-0: lustre-MDT0000-lwp-OST0001: operation obd_ping to node 10.100.4.147@tcp failed: rc = -107                   
Lustre: lustre-MDT0000-lwp-OST0001: Connection to lustre-MDT0000 (at 10.100.4.147@tcp) was lost; in progress operations using this service will wait for recovery to complete
Lustre: Skipped 1 previous similar message
Lustre: 9624:0:(client.c:1942:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1422618009/real 1422618009]  req@ffff88006f83ccc0 x1491657489867716/t0(0) o400-&amp;gt;MGC10.100.4.147@tcp@10.100.4.147@tcp:26/25 lens 224/224 e 0 to 1 dl 1422618016 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
Lustre: 9624:0:(client.c:1942:ptlrpc_expire_one_request()) Skipped 1 previous similar message                                  
LustreError: 166-1: MGC10.100.4.147@tcp: Connection to MGS (at 10.100.4.147@tcp) was lost; in progress operations using this service will fail
Lustre: 9623:0:(client.c:1942:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1422618016/real 1422618016]  req@ffff88003e42e680 x1491657489867740/t0(0) o250-&amp;gt;MGC10.100.4.147@tcp@10.100.4.147@tcp:26/25 lens 400/544 e 0 to 1 dl 1422618022 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1                                                                             
Lustre: Evicted from MGS (at 10.100.4.147@tcp) after server handle changed from 0x9f6377cf69c91a9b to 0x9f6377cf6a97ffe3       
Lustre: MGC10.100.4.147@tcp: Connection restored to MGS (at 10.100.4.147@tcp)                                                  
Lustre: Skipped 1 previous similar message                                                                                     
Lustre: 9623:0:(client.c:1942:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1422618016/real 1422618016]  req@ffff88005febe9c0 x1491657489867744/t0(0) o38-&amp;gt;lustre-MDT0000-lwp-OST0001@10.100.4.147@tcp:12/10 lens 400/544 e 0 to 1 dl 1422618027 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1                                                                       
Lustre: lustre-OST0001: deleting orphan objects from 0x0:162984 to 0x0:163009                                                  
Lustre: lustre-OST0000: deleting orphan objects from 0x0:155843 to 0x0:155873                                                  
LustreError: 11-0: MGC10.100.4.147@tcp: operation obd_ping to node 10.100.4.147@tcp failed: rc = -107                          
LustreError: Skipped 1 previous similar message
LustreError: 166-1: MGC10.100.4.147@tcp: Connection to MGS (at 10.100.4.147@tcp) was lost; in progress operations using this service will fail
LustreError: 14122:0:(mgc_request.c:526:do_requeue()) failed processing log: -5                                                
LustreError: 14122:0:(mgc_request.c:526:do_requeue()) Skipped 7 previous similar messages                                      
Lustre: Evicted from MGS (at 10.100.4.147@tcp) after server handle changed from 0x9f6377cf6a97ffe3 to 0x9f6377cf6a980275
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;4. Later at 133g umount would block forever&lt;/p&gt;

&lt;p&gt;I tend to think the hang thread in txg_wait_open() was the real cause, and the later hang threads in osd_object_write_lock() a consequence of it:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;The txg_wait_open() hang always preceded the osd_object_write_lock() hang.&lt;/li&gt;
	&lt;li&gt;Stack trace showed that the txg_wait_open() thread might hold a lock which the later threads tried to get:
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 21277  TASK: ffff88006130c040  CPU: 0   COMMAND: &quot;ll_ost00_007&quot;
 #0 [ffff88003ed5b880] schedule at ffffffff815296a0
 #1 [ffff88003ed5b948] cv_wait_common at ffffffffa011b415 [spl]
 #2 [ffff88003ed5b9c8] __cv_wait at ffffffffa011b495 [spl]
 #3 [ffff88003ed5b9d8] txg_wait_open at ffffffffa02215e3 [zfs]
 #4 [ffff88003ed5ba18] dmu_tx_wait at ffffffffa01e4939 [zfs]
 #5 [ffff88003ed5ba78] dmu_tx_assign at ffffffffa01e49e9 [zfs]
 #6 [ffff88003ed5bb28] osd_trans_start at ffffffffa0dd45ad [osd_zfs]
 #7 [ffff88003ed5bb58] ofd_trans_start at ffffffffa0f1f07c [ofd]
 #8 [ffff88003ed5bb88] ofd_object_destroy at ffffffffa0f21530 [ofd]
 #9 [ffff88003ed5bbd8] ofd_destroy_by_fid at ffffffffa0f1b79d [ofd]
#10 [ffff88003ed5bcd8] ofd_destroy_hdl at ffffffffa0f150ea [ofd]

PID: 21278  TASK: ffff88003e0e7500  CPU: 1   COMMAND: &quot;ll_ost00_008&quot;
 #0 [ffff880062b979b0] schedule at ffffffff815296a0
 #1 [ffff880062b97a78] rwsem_down_failed_common at ffffffff8152bd65
 #2 [ffff880062b97ad8] rwsem_down_write_failed at ffffffff8152bec3
 #3 [ffff880062b97b18] call_rwsem_down_write_failed at ffffffff8128fbe3
 #4 [ffff880062b97b78] osd_object_write_lock at ffffffffa0ddc2fb [osd_zfs]
 #5 [ffff880062b97b88] ofd_object_destroy at ffffffffa0f211c6 [ofd]
 #6 [ffff880062b97bd8] ofd_destroy_by_fid at ffffffffa0f1b79d [ofd]
 #7 [ffff880062b97cd8] ofd_destroy_hdl at ffffffffa0f150ea [ofd]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
	&lt;li&gt;I ran &quot;sanity --start-at 100 --stop-at 140&quot; for more than 60 times without reproducing the hang, so the cause should precede test 100.&lt;/li&gt;
	&lt;li&gt;This is also in line with Nathaniel&apos;s observation that &quot;umount/mount of osts works prior to test 50 and fails prior to test 100&quot;.&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="105309" author="isaac" created="Sun, 1 Feb 2015 16:21:01 +0000"  >&lt;p&gt;Now I can reproduce the txg_wait_open() hang with &quot;sanity --only 78,79,80&quot;. It seemed to be the smallest subset of tests to trigger it - I haven&apos;t been able to reproduce it if any one test is removed. Once a thread got stuck in txg_wait_open(), umount on OSS would block forever just like at the end of 133g.&lt;/p&gt;</comment>
                            <comment id="105335" author="isaac" created="Mon, 2 Feb 2015 06:58:58 +0000"  >&lt;p&gt;I found a possible bug &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6195&quot; title=&quot;osd-zfs: osd_declare_object_destroy() calls dmu_tx_hold_zap() with wrong keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6195&quot;&gt;&lt;del&gt;LU-6195&lt;/del&gt;&lt;/a&gt; in osd_declare_object_destroy() while looking at the code today, although I haven&apos;t fully understood its consequences to determine whether it could have something to do with this bug.&lt;/p&gt;</comment>
                            <comment id="105370" author="behlendorf" created="Mon, 2 Feb 2015 17:05:28 +0000"  >&lt;p&gt;If you&apos;re able to reproduce this bug it might be useful to check the following proc variables.&lt;/p&gt;

&lt;p&gt;First I&apos;d look at dmu_tx, it shows a list of counters which indicate why a TX can&apos;t be assigned to a TXG.  From the stacks which have been posted it looks like dmu_tx_assign() may just be failing repeatedly (maybe an incorrectly constructed TX?) and thus blocking in dmu_tx_wait() forever.  Knowing why would help narrow things down.  On a healthy system you should see almost everything in dmu_tx_assigned (success), the other common cases are dmu_tx_dirty_throttle and dmu_tx_dirty_delay.  These indicate that the txg_sync thread can&apos;t write data as fast as the processes are creating it and it&apos;s injecting a delay to slow them down to keep things in balance.  If you see more than a handful of something else it should be investigated.&lt;/p&gt;

&lt;p&gt;The other thing I&apos;d look at is /proc/spl/kstat/zfs/&amp;lt;pool&amp;gt;/txgs.  This contains of a history of the last N TXGs which were committed.  How long they took, how much IO was performed, how many IOPs were issued, etc.  To enable this you&apos;ll need to set the history size using the zfs_txg_history module parameter, by default it&apos;s disabled.  But if you want to log the last say 10 TXGs set it to 10.  In particular, I&apos;m curious if TXGs continue to roll forward at the usual rate while this thread is stuck (it sounds like it).  If so then the TXG engine itself would seem to be working fine and there&apos;s just something wrong with the specific TX which is blocked.&lt;/p&gt;</comment>
                            <comment id="105371" author="bzzz" created="Mon, 2 Feb 2015 17:09:39 +0000"  >&lt;p&gt;well, we&apos;re supposed to get a message in that case:&lt;/p&gt;

&lt;p&gt;	rc = &lt;del&gt;dmu_tx_assign(oh&lt;/del&gt;&amp;gt;ot_tx, TXG_WAIT);&lt;br/&gt;
	if (unlikely(rc != 0)) &lt;/p&gt;
{
		struct osd_device *osd = osd_dt_dev(d);
		/* dmu will call commit callback with error code during abort */
		if (!lu_device_is_md(&amp;amp;d-&amp;gt;dd_lu_dev) &amp;amp;&amp;amp; rc == -ENOSPC)
			CERROR(&quot;%s: failed to start transaction due to ENOSPC. &quot;
			       &quot;Metadata overhead is underestimated or &quot;
			       &quot;grant_ratio is too low.\n&quot;, osd-&amp;gt;od_svname);
		else
			CERROR(&quot;%s: can&apos;t assign tx: rc = %d\n&quot;,
			       osd-&amp;gt;od_svname, rc);
	}
</comment>
                            <comment id="105376" author="behlendorf" created="Mon, 2 Feb 2015 17:27:28 +0000"  >&lt;p&gt;Well not exactly.  When calling dmu_tx_assign() with TXG_WAIT it should only fail in the case where we&apos;re over quota.  For all other errors it will retry and we should be assigned to the next TXG (assuming the TX is sane).  So checking the dmu_tx proc file would show if we&apos;re hitting any of the ERESTART cases.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;/*
 * Assign tx to a transaction group.  txg_how can be one of:
 *
 * (1)  TXG_WAIT.  If the current open txg is full, waits until there&apos;s
 *      a new one.  This should be used when you&apos;re not holding locks.
 *      It will only fail if we&apos;re truly out of space (or over quota).
 *
 * (2)  TXG_NOWAIT.  If we can&apos;t assign into the current open txg without
 *      blocking, returns immediately with ERESTART.  This should be used
 *      whenever you&apos;re holding locks.  On an ERESTART error, the caller
 *      should drop locks, do a dmu_tx_wait(tx), and try again.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="105397" author="isaac" created="Mon, 2 Feb 2015 18:42:56 +0000"  >&lt;p&gt;Brian, you nailed it! Thanks a lot!&lt;/p&gt;

&lt;p&gt;Every time the hang happened, the TXGs would move forward, but unusually fast:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat lustre-ost1/txgs; sleep 1; cat lustre-ost1/txgs 
25 0 0x01 3 336 244675310220 123698699004892
txg      birth            state ndirty       nread        nwritten     reads    writes   otime        qtime        wtime        stime       
1010319525 123698585210993  S     0            0            0            0        0        84246        3380         33414        0           
1010319526 123698585295239  W     0            0            0            0        0        36794        2496         0            0           
1010319527 123698585332033  O     0            0            0            0        0        0            0            0            0           
25 0 0x01 3 336 244675310220 123699705972680
txg      birth            state ndirty       nread        nwritten     reads    writes   otime        qtime        wtime        stime       
1010324666 123699503872529  S     0            0            0            0        0        111328       5851         53035        0           
1010324667 123699503983857  W     0            0            0            0        0        59840        6406         0            0           
1010324668 123699504043697  O     0            0            0            0        0        0            0            0            0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;In this case, 5141 in one second.&lt;/p&gt;

&lt;p&gt;Here&apos;s dmu_tx:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat dmu_tx 
4 1 0x01 11 528 3293147069 124044054813482
name                            type data
dmu_tx_assigned                 4    32644
dmu_tx_delay                    4    0
dmu_tx_error                    4    0
dmu_tx_suspended                4    0
dmu_tx_group                    4    0
dmu_tx_memory_reserve           4    1012288401
dmu_tx_memory_reclaim           4    0
dmu_tx_dirty_throttle           4    0
dmu_tx_dirty_delay              4    0
dmu_tx_dirty_over_max           4    0
dmu_tx_quota                    4    0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And &lt;em&gt;dmu_tx_memory_reserve&lt;/em&gt; is still rapidly growing even though there&apos;s now no IO from Lustre at all:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# grep dmu_tx_memory_reserve dmu_tx; sleep 1; grep dmu_tx_memory_reserve dmu_tx;
dmu_tx_memory_reserve           4    1012794812
dmu_tx_memory_reserve           4    1012800331
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So the counter grew by 5519 in a second, roughly the same as the increase in TXG #. Looks like that&apos;s why the thread is still hanging in txg_wait_open().&lt;/p&gt;

&lt;p&gt;The only place where that counter gets bumped is dmu_tx_try_assign()=&amp;gt;dsl_dir_tempreserve_space()=&amp;gt;arc_tempreserve_space(). So I increased zfs_arc_max by about 500M, waited for the arc_adapt thread to pick up the change, and then a few seconds later the thread recovered from the hang and everything looked normal! Even umount worked!&lt;/p&gt;

&lt;p&gt;Before I increased zfs_arc_max, there was about 800M available from arc_sz to arc_c, and the hanged tx was only trying to remove one object (and update OI and accounting ZAPs) so clearly something was wrong with that TX (maybe &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6195&quot; title=&quot;osd-zfs: osd_declare_object_destroy() calls dmu_tx_hold_zap() with wrong keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6195&quot;&gt;&lt;del&gt;LU-6195&lt;/del&gt;&lt;/a&gt;). I&apos;ll find out.&lt;/p&gt;</comment>
                            <comment id="105415" author="behlendorf" created="Mon, 2 Feb 2015 19:40:49 +0000"  >&lt;p&gt;Whoa, that&apos;s definitely going to be a problem!  It sounds like Lustre constructed a massive TX relative to the target ARC size and therefore could never assign it to a TXG.  Something like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6195&quot; title=&quot;osd-zfs: osd_declare_object_destroy() calls dmu_tx_hold_zap() with wrong keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6195&quot;&gt;&lt;del&gt;LU-6195&lt;/del&gt;&lt;/a&gt; could cause this with an incorrect hold.  You might want to build ZFS with --enable-debug-dmu-tx, this will enable some additional statistics about the TX and some more sanity checking.&lt;/p&gt;</comment>
                            <comment id="105417" author="bzzz" created="Mon, 2 Feb 2015 19:46:28 +0000"  >&lt;p&gt;iirc, we got -ENOSPC from dmu_tx_assign() if TX is too big. for example, when we tried to destroy a file with lots of stripes.&lt;/p&gt;</comment>
                            <comment id="105429" author="behlendorf" created="Mon, 2 Feb 2015 20:54:03 +0000"  >&lt;p&gt;Alex that can happen (along with EDQUOT) if the pool is low on space but it&apos;s not guaranteed to happen even for very large TXs.  It&apos;s entirely possible that you&apos;ll fall in to this ERESTART retry loop first.  The DMU expects that individual TXs will be relatively small (~10MB at most) but that a TXG may contain many TXs.  If Lustre is creating TXs much larger than this I can see how this deadlock could happen.  It would be best if we could put an upper bound of the size of a Lustre TX.&lt;/p&gt;</comment>
                            <comment id="105445" author="adilger" created="Mon, 2 Feb 2015 23:26:46 +0000"  >&lt;p&gt;Kudos to everyone for the excellent work in tracking down this tricky problem.  Nathaniel, while a solution is being working on, could you please submit a patch to sanity to re-enable subtest 132 and exclude subtests 78, 79, and 80 for ZFS OSTs with a &lt;tt&gt;Test-Parameters:&lt;/tt&gt; line that runs sanity enough times to be confident of the result (maybe 6-10 times)?  That would allow us to start enforcing the review-zfs test results while the fix for this new issue is prepared, and ensure we don&apos;t regress in other areas in the meantime.  Excluding sanity test_133g failures we would have passed 27 of 30 recent review-zfs test sessions (one failued due to a problem in the patch, one failed due to timeouts in sanity-quota and ost-pools, and one had an additional failure in sanity-quota ).&lt;/p&gt;

&lt;p&gt;There is a similar check in ext4 to limit the size of a single transaction handle to be &amp;lt; 1/4 of the total journal size.  I don&apos;t think it unreasonable to put some upper bound on the size of a single ZFS tx, but what that size should be is unclear.  I recall the 10MB tx limit also, but I think it was just recently increased?  Related to this, llog updates (appends) they may be quite large &lt;em&gt;in theory&lt;/em&gt; but in practice will only affect a small number of blocks, and since an unlink of a widely-striped file may update a lot of llog files (up to 2000 stripes) it could cause very large theoretical transaction sizes that aren&apos;t seen in practice, so we don&apos;t want to cause them to gratuitously fail due to this check.  That is what &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2160&quot; title=&quot;Implement ZFS dmu_tx_hold_append() declarations for llog &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2160&quot;&gt;LU-2160&lt;/a&gt; &quot;Implement ZFS dmu_tx_hold_append() declarations for llog&quot; is about, but that hasn&apos;t been implemented yet.&lt;/p&gt;</comment>
                            <comment id="105449" author="isaac" created="Mon, 2 Feb 2015 23:37:34 +0000"  >&lt;p&gt;Working on the patch to disable 78, 79, and 80...&lt;/p&gt;</comment>
                            <comment id="105452" author="gerrit" created="Mon, 2 Feb 2015 23:46:08 +0000"  >&lt;p&gt;Isaac Huang (he.huang@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13600&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13600&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: umount hang in sanity 133g&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f45c320eeeeaff849ba2e97da0f6fe5185d1e149&lt;/p&gt;</comment>
                            <comment id="105463" author="isaac" created="Tue, 3 Feb 2015 01:19:42 +0000"  >&lt;p&gt;I just reproduced it again (with a &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6195&quot; title=&quot;osd-zfs: osd_declare_object_destroy() calls dmu_tx_hold_zap() with wrong keys&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6195&quot;&gt;&lt;del&gt;LU-6195&lt;/del&gt;&lt;/a&gt; workaround, so that&apos;s ruled out), and I found one fishy object on the OST:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# zdb -e -ddddd lustre-ost2/ost2 195
    Object  lvl   iblk   dblk  dsize  lsize   %full  type
       195    5    16K   128K  1.01M  1.00T    0.00  ZFS plain file
Indirect blocks:
               0 L4     0:54a00:400 4000L/400P F=8 B=513/513
     10000000000  L3    0:54600:400 4000L/400P F=8 B=513/513
     10000000000   L2   0:54200:400 4000L/400P F=8 B=513/513
     10000000000    L1  0:4fe00:400 4000L/400P F=8 B=513/513
     10000000000     L0 0:76aa00:20000 20000L/20000P F=1 B=513/513
     10000020000     L0 0:78aa00:20000 20000L/20000P F=1 B=513/513
     10000040000     L0 0:7aaa00:20000 20000L/20000P F=1 B=513/513
     10000060000     L0 0:7caa00:20000 20000L/20000P F=1 B=513/513
     10000080000     L0 0:7eaa00:20000 20000L/20000P F=1 B=513/513
     100000a0000     L0 0:80aa00:20000 20000L/20000P F=1 B=513/513
     100000c0000     L0 0:82aa00:20000 20000L/20000P F=1 B=513/513
     100000e0000     L0 0:84aa00:20000 20000L/20000P F=1 B=513/513

                segment [0000010000000000, 0000010000100000) size    1M
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;An object with only 4 indirect blocks, despite its lsize of 1T. I &lt;b&gt;guessed&lt;/b&gt; it&apos;s the object the hanged OST thread was trying to remove. The arc_c was 938M when it hanged, then I gradually increased it and the hang disappeared when arc_c was increased to 1.0G. Then that object also disappeared:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# zdb -e -ddddd lustre-ost2/ost2 195
    Object  lvl   iblk   dblk  dsize  lsize   %full  type
zdb: dmu_bonus_hold(195) failed, errno 2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt; So my guess was right: the thread tried to remove the object with 1T lsize, and dmu_tx_hold_free() estimated the memory overhead of freeing the object to be about 1G, then dmu_tx_assign() hanged in dmu_tx_try_assign()=&amp;gt;dsl_dir_tempreserve_space()=&amp;gt;arc_tempreserve_space(). I also kept watching &lt;em&gt;arcsz&lt;/em&gt;, and it grew by only a few MBs (it never grew over 60M).&lt;/p&gt;

&lt;p&gt;So it looked like dmu_tx_count_free() severely over-estimated txh_memory_tohold for dmu_tx_hold_free().&lt;/p&gt;</comment>
                            <comment id="105466" author="bzzz" created="Tue, 3 Feb 2015 01:42:26 +0000"  >&lt;p&gt;cool catch! I guess it should be possible to reproduce this with a regular truncate to, say 2-4TB ?&lt;/p&gt;</comment>
                            <comment id="105467" author="bzzz" created="Tue, 3 Feb 2015 01:58:18 +0000"  >&lt;p&gt;also, would it make sense to have own copy of dmu_tx_assign() with some checks like if we&apos;re trying for too long, then print an error?&lt;/p&gt;</comment>
                            <comment id="105487" author="adilger" created="Tue, 3 Feb 2015 10:15:14 +0000"  >&lt;p&gt;This looks like it is caused by sanity.sh test_80 &quot;Page eviction is equally fast at high offsets too&quot;, which writes 1MB at a 1TB offset in a sparse file to test client page cache truncation performance.  I guess it is also testing OSD-side block truncation...&lt;/p&gt;

&lt;p&gt;I don&apos;t think making our own version of dmu_tx_assign() is the right solution, but rather fixing the upstream ZFS code.  I&apos;m not sure if this should fail outright (leaving a user with an unremovable object?) or only print an error that the arc_c limit should be increased (preferably with some advice on how large it needs to be).  &lt;/p&gt;

&lt;p&gt;As for the proper solution, I&apos;d expect that the DMU code shouldn&apos;t be so pessimistic about TX size when deleting this sparse file.  I&apos;d imagine it only really needs a few hundred KB of dirty blocks, so maybe it needs a bit better accounting?  Is it accounting for a dense file and not realizing the file is sparse?  Is it possible to short-circuit all of the calculations in &lt;tt&gt;dmu_tx_count_free()&lt;/tt&gt; by checking the allocated block count from the dnode (&lt;tt&gt;dn_used&lt;/tt&gt;?), or is that a layering violation?&lt;/p&gt;</comment>
                            <comment id="105489" author="bzzz" created="Tue, 3 Feb 2015 11:06:33 +0000"  >&lt;p&gt;I agree that the proper fix can be done only in DMU. but there might be another similar overestimation leading to a livelock in some corner cases which will be hard to recognize given it&apos;s a silent loop within dmu_tx_assign().&lt;/p&gt;</comment>
                            <comment id="105511" author="isaac" created="Tue, 3 Feb 2015 16:59:36 +0000"  >&lt;p&gt;Last night I changed the seek=1M to seek=5K in test_80, and so far I&apos;ve run 5 iterations of sanity and 64 iterations of &quot;sanity --only 78,79,80&quot; without reproducing it. So it looks like we can work around it by reducing seek in test_80 (probably test_34g as well) without skipping any sub-test, while working on a proper fix.&lt;/p&gt;

&lt;p&gt;As to the fix:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;In arc_tempreserve_space():
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        if (reserve &amp;gt; arc_c/4 &amp;amp;&amp;amp; !arc_no_grow)
                arc_c = MIN(arc_c_max, reserve * 4);

        /*
         * Throttle when the calculated memory footprint for the TXG
         * exceeds the target ARC size.
         */
        if (reserve &amp;gt; arc_c) {                            
                DMU_TX_STAT_BUMP(dmu_tx_memory_reserve);
                return (SET_ERROR(ERESTART));
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
	&lt;ul&gt;
		&lt;li&gt;An error/debug message here would help.&lt;/li&gt;
		&lt;li&gt;Also I think the code should be changed to:
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        if (reserve &amp;gt; arc_c) {                                                              
                return (SET_ERROR(ENOMEM));
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The code just tried to grow ARC to the max possible in &quot;arc_c = MIN(arc_c_max, reserve * 4);&quot;, and if that&apos;s still not enough ERESTART doesn&apos;t seem to be the right choice - unless the admin would show up and bump zfs_arc_max, which in my opinion isn&apos;t a case for ERESTART.&lt;/p&gt;&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;The dmu_tx_count_free() caused problems in the past with sparse file and was &lt;em&gt;improved&lt;/em&gt; to be less over-estimating:&lt;br/&gt;
&lt;a href=&quot;https://github.com/zfsonlinux/zfs/commit/ff80d9b142826c15fa84e3c4b9ef1795fd6e9485&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/zfsonlinux/zfs/commit/ff80d9b142826c15fa84e3c4b9ef1795fd6e9485&lt;/a&gt;&lt;br/&gt;
Looks like there&apos;s more work there.&lt;/li&gt;
	&lt;li&gt;Investigate whether to limit the size of a single TX. The ZPL does exactly this in zfs_write():
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;int             max_blksz = zsb-&amp;gt;z_max_blksz;
......
                dmu_tx_hold_write(tx, zp-&amp;gt;z_id, woff, MIN(n, max_blksz));
......
                /*        
                 * XXX - should we really limit each write to z_max_blksz?
                 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
                 */
                nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="105512" author="bzzz" created="Tue, 3 Feb 2015 17:05:11 +0000"  >&lt;p&gt;take into account that with wide-striping we may need to modify upto ~2K objects (and each llog is modified twice: 8K header and record itself).&lt;/p&gt;</comment>
                            <comment id="105577" author="jlevi" created="Tue, 3 Feb 2015 19:31:00 +0000"  >&lt;p&gt;Patch to disable test(s):  &lt;a href=&quot;http://review.whamcloud.com/#/c/13600&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13600&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="105582" author="adilger" created="Tue, 3 Feb 2015 19:42:11 +0000"  >&lt;p&gt;Isaac, we cannot change the seek value in test_80, because that is testing VM cache invalidation specific to the client, so changing the file size will break this test on the client.&lt;/p&gt;

&lt;p&gt;Oleg&apos;s suggestion is to only exclude subtest 80 if the ARC size is too small, so that it still runs on other systems that have enough RAM to pass this subtest. It might be useful to check pass/fail rates against VM clusters to see if some VMs are failing more often because they have less RAM?  It doesn&apos;t seem that we need to exclude subtest 78 or 79 at all, but we&apos;re landing this patch anyway to allow review-ZFS to begin passing. &lt;/p&gt;</comment>
                            <comment id="105587" author="gerrit" created="Tue, 3 Feb 2015 19:48:39 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13600/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13600/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: umount hang in sanity 133g&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 298916a319e1c5da4be0df3609e4773fe70a4026&lt;/p&gt;</comment>
                            <comment id="105588" author="jlevi" created="Tue, 3 Feb 2015 19:49:34 +0000"  >&lt;p&gt;Failing tests disabled. Fix will be included in 2.8.&lt;/p&gt;</comment>
                            <comment id="105606" author="isaac" created="Tue, 3 Feb 2015 21:51:01 +0000"  >&lt;p&gt;Andreas, DMU wanted about 1G ARC size for the object delete. Most of our VMs are configured with 1.85G memory so ARC size defaults at 938M. So they&apos;ll all run into this problem. We can increase ARC size a bit to let the object delete pass, because only a couple of MBs were really needed. But that&apos;d likely run into other issues, e.g. &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5809&quot; title=&quot;sanity-benchmark test pios_fpp: OOM on zfs OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5809&quot;&gt;&lt;del&gt;LU-5809&lt;/del&gt;&lt;/a&gt;. So I think our best workaround would be to only exclude 80. I&apos;m now testing that on Eagle - will push a patch shortly.&lt;/p&gt;</comment>
                            <comment id="105611" author="gerrit" created="Tue, 3 Feb 2015 22:18:10 +0000"  >&lt;p&gt;Isaac Huang (he.huang@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13620&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13620&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: umount hang in sanity 133g&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 39acc43a443a23f3e92dab63155ad658fdeb8cf2&lt;/p&gt;</comment>
                            <comment id="105649" author="gerrit" created="Wed, 4 Feb 2015 07:13:06 +0000"  >&lt;p&gt;Isaac Huang (he.huang@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13630&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13630&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: umount hang in sanity 133g&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 13cc94af090e0b1279c2987840a0e6de685e03ee&lt;/p&gt;</comment>
                            <comment id="105719" author="isaac" created="Wed, 4 Feb 2015 19:56:17 +0000"  >&lt;p&gt;It looked like the &lt;b&gt;preferred&lt;/b&gt; DMU API to remove large/sparse objects is &lt;em&gt;dmu_free_long_range()&lt;/em&gt;. The ZPL was able to remove any large/sparse objects I threw at it where it&apos;d just choke up osd-zfs:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;zfs_rmnode(znode_t *zp)
......
        /*
         * Free up all the data in the file.
         */
        error = dmu_free_long_range(os, zp-&amp;gt;z_id, 0, DMU_OBJECT_END);
......
        /*
         * Set up the final transaction.
         */
        tx = dmu_tx_create(os);
        dmu_tx_hold_free(tx, zp-&amp;gt;z_id, 0, DMU_OBJECT_END);
......
        error = dmu_tx_assign(tx, TXG_WAIT);
......
        zfs_znode_delete(zp, tx); ==&amp;gt; VERIFY(0 == dmu_object_free(os, obj, tx));
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I created a simple patch and my tests showed it worked:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;@@ -444,6 +444,9 @@ static void __osd_declare_object_destroy(const struct lu_env *env,
        zap_cursor_t            *zc;
        int                      rc = 0;
 
+       rc = dmu_free_long_range(osd-&amp;gt;od_os, oid, 0, DMU_OBJECT_END);
+       LASSERTF(rc == 0, &quot;dmu_free_long_range &quot;LPU64&quot; failed: %d\n&quot;, oid, rc);
+
        dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But two things I&apos;m not sure about:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;I had to call it in do_declare_destroy() because it must be called before dmu_tx_hold_free() is called, that means when do_declare_destroy() returns the data in the object has already been freed. Is it OK for Lustre/OSD?&lt;/li&gt;
	&lt;li&gt;The &lt;em&gt;dmu_free_long_range()&lt;/em&gt; frees data in the object in several TXs, so it might fail in the middle leaving the data in the object partially freed. If do_declare_destroy() fails for this reason, what would OSD layer do? The object itself and the OI and accounting ZAPs are still freed/updated in a single TX so that&apos;s consistent but the object data can be partially freed already.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;We can work on dmu_tx_count_free() to correctly estimate memory overhead for the particular sparse file created by test_80(). But we&apos;d have to deal with any large/sparse object anyway, so I think in the long run it&apos;d be better to go with &lt;em&gt;dmu_free_long_range()&lt;/em&gt;.&lt;/p&gt;</comment>
                            <comment id="105721" author="behlendorf" created="Wed, 4 Feb 2015 19:58:57 +0000"  >&lt;p&gt;Nice find, that neatly explains everything.  Thanks Issac for opening issue 3064 at zfsonlinux so we can decide of what should be done on the ZFS side.  We should certainly investigate improving the quality of the worst case estimate.  But it really does need to be a worst case estimate or we risk potentially stalling the txg engine in the worst (but very unlikely) case.  Anyway, I&apos;m open to ideas!&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://github.com/zfsonlinux/zfs/issues/3064&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/zfsonlinux/zfs/issues/3064&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="105722" author="bzzz" created="Wed, 4 Feb 2015 20:07:03 +0000"  >&lt;p&gt;Isaac, this is usually done using an additional index/list containing object to be freed. so OSD&apos;s destroy put an object on the list (plus, say, OI removal), then a separate thread (or osd_trans_stop()) truncates the object, frees it and remove from the list/index using as many transactions as needed.&lt;/p&gt;</comment>
                            <comment id="105724" author="behlendorf" created="Wed, 4 Feb 2015 20:12:51 +0000"  >&lt;p&gt;Yes, what Alex said.  This is exactly how the Posix layer also works, the object to be removed is placed on an unlinked listed for handling latter in as many transaction as needed.  Only once the blocks are all freed in the obejct removed from the list.  See zfs_unlinked_drain().&lt;/p&gt;</comment>
                            <comment id="105750" author="adilger" created="Wed, 4 Feb 2015 22:56:54 +0000"  >&lt;p&gt;Note that by the time that the OST object is being destroyed it has already been unlinked from the MDS namespace, and that unlink is committed on the MDS. The OFD has also revoked client locks and discarded any cached dirty pages on the clients, so there is no danger to destroy the object in stages. That is what ext4 is doing internally (truncating blocks in small chunks from the end of the file).&lt;/p&gt;

&lt;p&gt;Since the MDT is already logging the object destroy locally and will resend it if the OSS crashes. The OST object hasn&apos;t been deleted from the OST namespace yet, so I don&apos;t see a requirement for more infrastructure to handle this at the OSD level. It is fine to truncate the object before destroy FOR THE OST ONLY, and higher layers will handle it.&lt;/p&gt;

&lt;p&gt;That said, since the OSD code is common, we shouldn&apos;t be exposing truncated-but-not-unlined objects on the MDT, so it may be easiest to truncate to zero explicitly from the OFD before the object destroy?&lt;/p&gt;</comment>
                            <comment id="105768" author="bzzz" created="Thu, 5 Feb 2015 03:21:16 +0000"  >&lt;p&gt;well, that would mean we can&apos;t atomically destroy huge objects using OUT which I&apos;d like to do to batch OST object destroy&apos;s from MDT.&lt;/p&gt;</comment>
                            <comment id="106415" author="isaac" created="Tue, 10 Feb 2015 07:09:37 +0000"  >&lt;p&gt;How about:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;An object (maybe a ZAP) is created per OST/MDT, to hold objects to be freed. Let&apos;s call it the deathrow object.&lt;/li&gt;
	&lt;li&gt;In declare_object_destroy() and object_destroy(), within a single TX, move the object to the deadthrow, and update OI and accounting ZAPs as well. So the object disappears from OST/MDT namespace atomically.
	&lt;ul&gt;
		&lt;li&gt;If an object is small enough, it can be destroyed in the old way.&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;A separate thread works on the deathrow object, truncating and freeing every object in it.
	&lt;ul&gt;
		&lt;li&gt;This may break some tests that wait for free space to increase after object removal. We may also do at least the truncating in osd_trans_stop().&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;All implemented at the osd-zfs layer.&lt;/p&gt;</comment>
                            <comment id="106416" author="bzzz" created="Tue, 10 Feb 2015 07:21:31 +0000"  >&lt;p&gt;from Lustre point of view, the object is destroyed when it can&apos;t be found with lu_object_find(). IOW, when corresponding FID is removed from OI - this should be a part of object destroy. space accounting shouldn&apos;t be an issue as it&apos;s not released immediately in any case: grants/quota can release reserved space upon commit, but a new reserve can be made only with new statfs. truncate/dnode destroy in osd_trans_stop() looks OK.&lt;/p&gt;</comment>
                            <comment id="106607" author="adilger" created="Wed, 11 Feb 2015 10:07:01 +0000"  >&lt;p&gt;It makes sense to keep this compatible with ZFS ZPL if at all possible, so that if large files are unlinked under ZPL and then mounted as Lustre, or vice versa, we don&apos;t defer deleting them forever.  I see this is handled in &lt;tt&gt;zfs_unlinked_add()&lt;/tt&gt; and &lt;tt&gt;zfs_unlinked_drain()&lt;/tt&gt; with a ZAP named &quot;&lt;tt&gt;DELETE_QUEUE&lt;/tt&gt;&quot; (&lt;tt&gt;ZFS_UNLINKED_SET&lt;/tt&gt;) in the MASTER_NODE_OBJ.  Even if we need to implement our own routines to handle this, it makes sense to use the same ZAP and zap format (&lt;tt&gt;zap_add_int()&lt;/tt&gt;).&lt;/p&gt;</comment>
                            <comment id="106662" author="behlendorf" created="Wed, 11 Feb 2015 18:03:47 +0000"  >&lt;p&gt;I like Andreas&apos;s idea of keeping the unlink behavior compatible with the ZPL.  It would be ideal if you could reuse the existing ZPL functions but those functions are tied quite closely to ZPL specific data structures so that&apos;s probably not workable.  But the ZFS_UNLINKED_SET object itself is just a ZAP containing a list of object ids.  And since objects on disk are already constructed to be compatible with the ZPL we should be able to safely use it.  Isaac&apos;s design is nice, but let me suggest a few minor tweaks:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Use the existing ZFS_UNLINKED_SET object linked the MASTER_NODE_OBJ as the deathrow object.&lt;/li&gt;
	&lt;li&gt;In declare_object_destroy() and object_destroy() just handle moving the object to the ZFS_UNLINKED_SET in a single TX.&lt;/li&gt;
	&lt;li&gt;In a dedicated thread, taskq, or generic linux worker thread regularly walk the ZFS_UNLINKED_SET and rely on dmu_free_long_range() to split the free over as many TXGs as required.&lt;/li&gt;
	&lt;li&gt;I don&apos;t think there&apos;s any advantage in handling small object destruction synchronously in object_destroy().  It&apos;s simpler and probably more efficient to always do this asynchronously.&lt;/li&gt;
	&lt;li&gt;Start draining the ZFS_UNLINKED_SET right away when remounting the OSD (this happening during mount for the ZPL).&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="106671" author="bzzz" created="Wed, 11 Feb 2015 18:39:38 +0000"  >&lt;p&gt;I&apos;d think that the optimization for small objects (notice most of MDT&apos;s objects are literally empty) make sense as we don&apos;t need to modify yet another ZAP twice. I guess there is no strong requirement to implement this right away, but still.&lt;/p&gt;</comment>
                            <comment id="106678" author="behlendorf" created="Wed, 11 Feb 2015 19:25:09 +0000"  >&lt;p&gt;I could see a case for zero length files as an possible optimization.  But I suspect that even for the MDT it would be more efficient to handle the freeing asynchronously outside of any request processing.  Even if the file is zero length you&apos;re still going to be freeing a spill block for the xattrs and updating a dbuf for the dnode object.  Personally I&apos;d keep it as simple and concise as possible until it&apos;s clear something more is required.  But that&apos;s just my preference.&lt;/p&gt;

&lt;p&gt;Keep in mind that none of this free space will be available for a couple TXGs anyway.&lt;/p&gt;

</comment>
                            <comment id="106682" author="bzzz" created="Wed, 11 Feb 2015 19:43:35 +0000"  >&lt;p&gt;I have no objection to do this as simple as possible. my point was that MDT is known to be CPU-bound and ZAP (even micro ZAP) isn&apos;t free.&lt;/p&gt;</comment>
                            <comment id="106733" author="isaac" created="Thu, 12 Feb 2015 00:50:06 +0000"  >&lt;p&gt;Thanks all. I&apos;ll work on a patch first without the small object optimization to get this bug fixed; then will benchmark to figure out whether to optimize the small object path or not.&lt;/p&gt;</comment>
                            <comment id="106752" author="adilger" created="Thu, 12 Feb 2015 02:28:09 +0000"  >&lt;p&gt;I would agree with Alex on this. By deferring unlink of small files it will probably double or triple  the total IO that the MDT is doing because in addition to the actual dnode deletion it also needs to insert the dnode into the deathrow ZAP in one TXG and then delete it from the same ZAP in a different txg. If there are a large number of objects being deleted at once (easily possible on the MDT), then the deathrow ZAP may get quite large (and never shrink) and updates would become less efficient than if it is kept small. &lt;/p&gt;</comment>
                            <comment id="107358" author="bogl" created="Thu, 19 Feb 2015 15:56:10 +0000"  >&lt;p&gt;another seen on b2_5 with zfs:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/435c3152-b816-11e4-9ecb-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/435c3152-b816-11e4-9ecb-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="107377" author="gerrit" created="Thu, 19 Feb 2015 18:29:15 +0000"  >&lt;p&gt;Nathaniel Clark (nathaniel.l.clark@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13805&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13805&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: umount hang in sanity 133g&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 817cf8a2e781d546508929a9f58b44561ae3361c&lt;/p&gt;</comment>
                            <comment id="115609" author="gerrit" created="Sun, 17 May 2015 22:46:54 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13630/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13630/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5242&quot; title=&quot;Test hang sanity test_132, test_133: umount ost&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5242&quot;&gt;&lt;del&gt;LU-5242&lt;/del&gt;&lt;/a&gt; osd-zfs: umount hang in sanity 133g&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9b704e4088d867851cdb011f0a2560b1e622555c&lt;/p&gt;</comment>
                            <comment id="115660" author="pjones" created="Mon, 18 May 2015 14:20:29 +0000"  >&lt;p&gt;Landed for 2.8&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="25067">LU-5163</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="28117">LU-6105</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="23474">LU-4716</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="24432">LU-4968</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="20090">LU-3665</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="28077">LU-6089</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="28362">LU-6155</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="25386">LU-5277</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="26991">LU-5737</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="27844">LU-6008</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="28495">LU-6195</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="24369">LU-4950</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="31526">LU-7020</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="12251">LU-2160</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="16726" name="zpool-events.txt" size="463587" author="utopiabound" created="Wed, 21 Jan 2015 19:53:21 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwpon:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14622</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>