<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:29:04 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2887] sanity-quota test_12a: slow due to ZFS VMs sharing single disk</title>
                <link>https://jira.whamcloud.com/browse/LU-2887</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Nathaniel Clark &amp;lt;nathaniel.l.clark@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/c3a0b364-812d-11e2-b609-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/c3a0b364-812d-11e2-b609-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_12a failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity-quota 12a&lt;/p&gt;

&lt;p&gt;Looking through test 12a, things seem to have hung up on the runas dd (with oflag=sync) at the end of the test.&lt;/p&gt;

&lt;p&gt;OST has threads that are blocked on disk I/O (oss dmesg):&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;txg_sync      D 0000000000000000     0 24236      2 0x00000080
 ffff88005027dbc0 0000000000000046 ffff88004b906ec0 0000000000000086
 ffff88005027db70 ffff88007c7d4408 0000000000000001 ffff88007c7d4420
 ffff880052101058 ffff88005027dfd8 000000000000fb88 ffff880052101058
Call Trace:
 [&amp;lt;ffffffff81090b9e&amp;gt;] ? prepare_to_wait_exclusive+0x4e/0x80
 [&amp;lt;ffffffffa016b5ac&amp;gt;] cv_wait_common+0x9c/0x1a0 [spl]
 [&amp;lt;ffffffffa02d5160&amp;gt;] ? zio_execute+0x0/0xf0 [zfs]
 [&amp;lt;ffffffff81090990&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa016b6e3&amp;gt;] __cv_wait+0x13/0x20 [spl]
 [&amp;lt;ffffffffa02d533b&amp;gt;] zio_wait+0xeb/0x160 [zfs]
 [&amp;lt;ffffffffa026b807&amp;gt;] dsl_pool_sync+0x2a7/0x480 [zfs]
 [&amp;lt;ffffffffa027e147&amp;gt;] spa_sync+0x397/0x9a0 [zfs]
 [&amp;lt;ffffffffa028fd41&amp;gt;] txg_sync_thread+0x2c1/0x490 [zfs]
 [&amp;lt;ffffffff810527f9&amp;gt;] ? set_user_nice+0xc9/0x130
 [&amp;lt;ffffffffa028fa80&amp;gt;] ? txg_sync_thread+0x0/0x490 [zfs]
 [&amp;lt;ffffffffa0164668&amp;gt;] thread_generic_wrapper+0x68/0x80 [spl]
 [&amp;lt;ffffffffa0164600&amp;gt;] ? thread_generic_wrapper+0x0/0x80 [spl]
 [&amp;lt;ffffffff81090626&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff81090590&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
ll_ost_io00_0 D 0000000000000000     0 18170      2 0x00000080
 ffff8800427b9820 0000000000000046 0000000000000046 0000000000000001
 ffff8800427b98b0 0000000000000086 ffff8800427b97e0 ffff88005027dd60
 ffff8800427b7ab8 ffff8800427b9fd8 000000000000fb88 ffff8800427b7ab8
Call Trace:
 [&amp;lt;ffffffff81090b9e&amp;gt;] ? prepare_to_wait_exclusive+0x4e/0x80
 [&amp;lt;ffffffffa016b5ac&amp;gt;] cv_wait_common+0x9c/0x1a0 [spl]
 [&amp;lt;ffffffff81090990&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa016b6e3&amp;gt;] __cv_wait+0x13/0x20 [spl]
 [&amp;lt;ffffffffa028f573&amp;gt;] txg_wait_synced+0xb3/0x190 [zfs]
 [&amp;lt;ffffffffa0c71015&amp;gt;] osd_trans_stop+0x365/0x420 [osd_zfs]
 [&amp;lt;ffffffffa0cb9062&amp;gt;] ofd_trans_stop+0x22/0x60 [ofd]
 [&amp;lt;ffffffffa0cbdf06&amp;gt;] ofd_commitrw_write+0x406/0x11b0 [ofd]
 [&amp;lt;ffffffffa0cbf13d&amp;gt;] ofd_commitrw+0x48d/0x920 [ofd]
 [&amp;lt;ffffffffa085b708&amp;gt;] obd_commitrw+0x128/0x3d0 [ost]
 [&amp;lt;ffffffffa0862599&amp;gt;] ost_brw_write+0xe49/0x14d0 [ost]
 [&amp;lt;ffffffff812739b6&amp;gt;] ? vsnprintf+0x2b6/0x5f0
 [&amp;lt;ffffffffa088c1f0&amp;gt;] ? target_bulk_timeout+0x0/0xc0 [ptlrpc]
 [&amp;lt;ffffffffa08680e3&amp;gt;] ost_handle+0x31e3/0x46f0 [ost]
 [&amp;lt;ffffffffa05ca154&amp;gt;] ? libcfs_id2str+0x74/0xb0 [libcfs]
 [&amp;lt;ffffffffa08dc02c&amp;gt;] ptlrpc_server_handle_request+0x41c/0xdf0 [ptlrpc]
 [&amp;lt;ffffffffa05be5de&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
 [&amp;lt;ffffffffa08d3759&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
 [&amp;lt;ffffffff81052223&amp;gt;] ? __wake_up+0x53/0x70
 [&amp;lt;ffffffffa08dd576&amp;gt;] ptlrpc_main+0xb76/0x1870 [ptlrpc]
 [&amp;lt;ffffffffa08dca00&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffffa08dca00&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffffa08dca00&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="17731">LU-2887</key>
            <summary>sanity-quota test_12a: slow due to ZFS VMs sharing single disk</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>MB</label>
                            <label>performance</label>
                            <label>zfs</label>
                    </labels>
                <created>Wed, 27 Feb 2013 21:34:34 +0000</created>
                <updated>Mon, 29 May 2017 06:03:58 +0000</updated>
                            <resolved>Sat, 3 Dec 2016 01:25:49 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                    <version>Lustre 2.4.1</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.4.2</fixVersion>
                    <fixVersion>Lustre 2.5.1</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>14</watches>
                                                                            <comments>
                            <comment id="53147" author="bzzz" created="Wed, 27 Feb 2013 21:40:08 +0000"  >&lt;p&gt;i&apos;ve been this in the local testing as well.&lt;/p&gt;</comment>
                            <comment id="53148" author="utopiabound" created="Wed, 27 Feb 2013 21:48:48 +0000"  >&lt;p&gt;Compairing results with ldiskfs via maloo:&lt;/p&gt;

&lt;p&gt;Some zfs runs complete in ~4K seconds (possibly SLOW=yes).&lt;br/&gt;
ldisksfs runs complete in ~300 to 900 seconds.&lt;/p&gt;

&lt;p&gt;Possible fix is to mark this as SLOW only or SLOW for zfs?&lt;/p&gt;</comment>
                            <comment id="53149" author="niu" created="Wed, 27 Feb 2013 21:48:55 +0000"  >&lt;p&gt;Will you work on this, Alex? Seems it&apos;s a generic zfs sync problem?&lt;/p&gt;</comment>
                            <comment id="53150" author="bzzz" created="Wed, 27 Feb 2013 21:52:07 +0000"  >&lt;p&gt;sorry, not anytime soon&lt;/p&gt;</comment>
                            <comment id="53402" author="utopiabound" created="Tue, 5 Mar 2013 20:07:41 +0000"  >&lt;p&gt;Tag should be &quot;sometimes_except&quot; since it&apos;s now a SLOW test for zfs&lt;/p&gt;</comment>
                            <comment id="53624" author="bfaccini" created="Fri, 8 Mar 2013 15:18:31 +0000"  >&lt;p&gt;Alex, do you think we can learn something with some profiling ??&lt;/p&gt;</comment>
                            <comment id="53697" author="bfaccini" created="Mon, 11 Mar 2013 12:13:38 +0000"  >&lt;p&gt;Whole logs analysis does not indicate threads (at least ll_ost_io00_0 with pid 18170, which could be traced in Lustre debug-log) were hung. So we may only face a very slow test execution due to some external cause, the perfs were horribly slow :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;09:41:50: [dd] [if=/dev/zero] [bs=1M] [of=/mnt/lustre/d0.sanity-quota/d12/f.sanity-quota.12a-0] [count=17] [oflag=sync]
10:18:20:17+0 records in
10:18:23:17+0 records out
10:18:24:17825792 bytes (18 MB) copied, 2288.82 s, 7.8 kB/s
10:18:24:Write to ost1...
10:18:25:running as uid/gid/euid/egid 60000/60000/60000/60000, groups:
10:18:26: [dd] [if=/dev/zero] [bs=1M] [of=/mnt/lustre/d0.sanity-quota/d12/f.sanity-quota.12a-1] [count=17] [oflag=sync]
10:26:34:dd: writing `/mnt/lustre/d0.sanity-quota/d12/f.sanity-quota.12a-1&apos;: Disk quota exceeded
10:26:37:5+0 records in
10:26:38:4+0 records out
10:26:39:4194304 bytes (4.2 MB) copied, 490.729 s, 8.5 kB/s
10:26:39:Free space from ost0...
10:26:40:CMD: wtm-24vm7 lctl set_param -n osd*.*MD*.force_sync 1
10:26:42:CMD: wtm-24vm7 lctl get_param -n osc.*MDT*.sync_*
10:26:43:CMD: wtm-24vm7 lctl get_param -n osc.*MDT*.sync_*
10:26:44:CMD: wtm-24vm7 lctl get_param -n osc.*MDT*.sync_*
10:26:45:CMD: wtm-24vm7 lctl get_param -n osc.*MDT*.sync_*
10:26:47:Waiting for local destroys to complete
10:26:51:CMD: wtm-24vm8 lctl set_param -n osd*.*OS*.force_sync 1
10:26:52:Write to ost1 after space freed from ost0...
10:26:54:running as uid/gid/euid/egid 60000/60000/60000/60000, groups:
10:26:55: [dd] [if=/dev/zero] [bs=1M] [of=/mnt/lustre/d0.sanity-quota/d12/f.sanity-quota.12a-1] [count=17] [oflag=sync]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt; 

&lt;p&gt;BTW, my own runs of test_12a on zfs show much better perfs/stats. &lt;/p&gt;

&lt;p&gt;Nathaniel, did you hit this problem/time-out multiple times ?? Also, you have already set test_12a as ZFS_SLOW in change 5553 for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2874&quot; title=&quot;Test timeout failure on test suite replay-ost-single test_8a: timeout on wait for dd&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2874&quot;&gt;&lt;del&gt;LU-2874&lt;/del&gt;&lt;/a&gt; !&lt;/p&gt;

&lt;p&gt;I will try to re-run the same test suite for your original &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2829&quot; title=&quot;Timeout on sanityn test_33a: zfs slow when commit_on_sharing enabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2829&quot;&gt;&lt;del&gt;LU-2829&lt;/del&gt;&lt;/a&gt;/change 5530/patch #2/build 13602 where these &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/c3a0b364-812d-11e2-b609-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/c3a0b364-812d-11e2-b609-52540035b04c&lt;/a&gt; tests failed.&lt;/p&gt;</comment>
                            <comment id="53701" author="bfaccini" created="Mon, 11 Mar 2013 12:37:14 +0000"  >&lt;p&gt;Humm too bad/late, Build 13602 has been removed ...&lt;/p&gt;

&lt;p&gt;So back in trying to reproduce the very slow runs of test_12a.&lt;/p&gt;</comment>
                            <comment id="53790" author="utopiabound" created="Tue, 12 Mar 2013 10:29:49 +0000"  >&lt;p&gt;Most recent failure &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/bb4b4890-8895-11e2-b643-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/bb4b4890-8895-11e2-b643-52540035b04c&lt;/a&gt;&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;Failure Rate: 7.00% of last 100 executions &lt;span class=&quot;error&quot;&gt;&amp;#91;all branches&amp;#93;&lt;/span&gt;&lt;/p&gt;&lt;/blockquote&gt;</comment>
                            <comment id="53875" author="bzzz" created="Wed, 13 Mar 2013 05:17:19 +0000"  >&lt;p&gt;it might make sense to add some stats to osd-zfs/ so we know for sure how long txg_wait_synced() takes ?&lt;/p&gt;</comment>
                            <comment id="54138" author="jlevi" created="Fri, 15 Mar 2013 16:28:45 +0000"  >&lt;p&gt;Lowering priority per discussion with Oleg.&lt;/p&gt;</comment>
                            <comment id="64605" author="yujian" created="Tue, 20 Aug 2013 14:10:02 +0000"  >&lt;p&gt;Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/33/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/33/&lt;/a&gt;&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;sanity-quota test_7a hung as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;running as uid/gid/euid/egid 60000/60000/60000/60000, groups:
 [dd] [if=/dev/zero] [bs=1M] [of=/mnt/lustre/d0.sanity-quota/d7/f.sanity-quota.7a] [count=21] [oflag=sync]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Syslog on OSS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Aug 18 23:22:00 wtm-15vm4 kernel: txg_sync      D 0000000000000000     0 15003      2 0x00000080
Aug 18 23:22:00 wtm-15vm4 kernel: ffff880052733ba0 0000000000000046 ffff8800ffffffff 0000333ae2e38992
Aug 18 23:22:00 wtm-15vm4 kernel: 00000000a0529170 ffff88006a5fce50 0000000000386bda ffffffffadee9654
Aug 18 23:22:00 wtm-15vm4 kernel: ffff8800523f3ab8 ffff880052733fd8 000000000000fb88 ffff8800523f3ab8
Aug 18 23:22:00 wtm-15vm4 kernel: Call Trace:
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff810a2351&amp;gt;] ? ktime_get_ts+0xb1/0xf0
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff8150e913&amp;gt;] io_schedule+0x73/0xc0
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e6d4c&amp;gt;] cv_wait_common+0x8c/0x100 [spl]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff81096cc0&amp;gt;] ? autoremove_wake_function+0x0/0x40
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e3717&amp;gt;] ? taskq_dispatch_ent+0x57/0x110 [spl]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e6dd8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa052939b&amp;gt;] zio_wait+0xfb/0x190 [zfs]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa04c2135&amp;gt;] dsl_pool_sync+0x2f5/0x540 [zfs]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa04da82e&amp;gt;] spa_sync+0x39e/0x970 [zfs]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff8103b8d9&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa04e582a&amp;gt;] txg_sync_thread+0x27a/0x4b0 [zfs]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff810560a9&amp;gt;] ? set_user_nice+0xc9/0x130
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa04e55b0&amp;gt;] ? txg_sync_thread+0x0/0x4b0 [zfs]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e2a3f&amp;gt;] thread_generic_wrapper+0x5f/0x70 [spl]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e29e0&amp;gt;] ? thread_generic_wrapper+0x0/0x70 [spl]
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff81096956&amp;gt;] kthread+0x96/0xa0
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff810968c0&amp;gt;] ? kthread+0x0/0xa0
Aug 18 23:22:00 wtm-15vm4 kernel: [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/760750e4-095f-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/760750e4-095f-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="64608" author="yujian" created="Tue, 20 Aug 2013 14:50:28 +0000"  >&lt;p&gt;Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/33/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/33/&lt;/a&gt;&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;replay-ost-single test_5 hung, and syslog on OSS showed that:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Aug 18 17:17:57 wtm-15vm4 kernel: txg_sync      D 0000000000000000     0 29194      2 0x00000080
Aug 18 17:17:57 wtm-15vm4 kernel: ffff8800683c5ba0 0000000000000046 ffff8800ffffffff 00004af162da63c4
Aug 18 17:17:57 wtm-15vm4 kernel: 000000004a452700 ffff88004515a750 000000000070a032 ffffffffadef130d
Aug 18 17:17:57 wtm-15vm4 kernel: ffff88006c9c1058 ffff8800683c5fd8 000000000000fb88 ffff88006c9c1058
Aug 18 17:17:57 wtm-15vm4 kernel: Call Trace:
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff810a2351&amp;gt;] ? ktime_get_ts+0xb1/0xf0
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff8150e913&amp;gt;] io_schedule+0x73/0xc0
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e6d4c&amp;gt;] cv_wait_common+0x8c/0x100 [spl]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff81096cc0&amp;gt;] ? autoremove_wake_function+0x0/0x40
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e3717&amp;gt;] ? taskq_dispatch_ent+0x57/0x110 [spl]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e6dd8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa052939b&amp;gt;] zio_wait+0xfb/0x190 [zfs]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa04c1f2c&amp;gt;] dsl_pool_sync+0xec/0x540 [zfs]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa04da82e&amp;gt;] spa_sync+0x39e/0x970 [zfs]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff8103b8d9&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa04e582a&amp;gt;] txg_sync_thread+0x27a/0x4b0 [zfs]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff810560a9&amp;gt;] ? set_user_nice+0xc9/0x130
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa04e55b0&amp;gt;] ? txg_sync_thread+0x0/0x4b0 [zfs]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e2a3f&amp;gt;] thread_generic_wrapper+0x5f/0x70 [spl]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffffa03e29e0&amp;gt;] ? thread_generic_wrapper+0x0/0x70 [spl]
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff81096956&amp;gt;] kthread+0x96/0xa0
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff810968c0&amp;gt;] ? kthread+0x0/0xa0
Aug 18 17:17:57 wtm-15vm4 kernel: [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/15fe8b0a-095e-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/15fe8b0a-095e-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The following sub-tests also hung at &quot;txg_sync&quot;:&lt;br/&gt;
large-scale test_3a: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/9ed19422-0963-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/9ed19422-0963-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;br/&gt;
obdfilter-survey test_1a: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/653914a0-0964-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/653914a0-0964-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;br/&gt;
parallel-scale test_compilebench: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/18f7c150-0963-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/18f7c150-0963-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;br/&gt;
parallel-scale-nfsv3 test_compilebench: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/bd745a9e-0964-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/bd745a9e-0964-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;br/&gt;
parallel-scale-nfsv4 test_compilebench: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/17b573b2-0965-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/17b573b2-0965-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;br/&gt;
posix test_1: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/73bee652-0965-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/73bee652-0965-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The issue in this ticket is blocking the zfs test session on Lustre b2_4 branch:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sessions/0343b17e-095a-11e3-ad8a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sessions/0343b17e-095a-11e3-ad8a-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="65842" author="yujian" created="Thu, 5 Sep 2013 14:43:11 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/44/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/44/&lt;/a&gt; (2.4.1 RC1)&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;sanity-quota test 12a hit the same failure:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/15cc22b2-1559-11e3-8938-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/15cc22b2-1559-11e3-8938-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;parallel-scale test simul: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/4e4fc084-155a-11e3-8938-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/4e4fc084-155a-11e3-8938-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="66814" author="bzzz" created="Tue, 17 Sep 2013 05:22:28 +0000"  >&lt;p&gt;is there a way to generate crash dump for the case? I&apos;m still scratching my head around the issue. talked to Brian B., but he doesn&apos;t see the root cause as well.&lt;/p&gt;</comment>
                            <comment id="66894" author="yujian" created="Wed, 18 Sep 2013 05:28:56 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/45/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/45/&lt;/a&gt; (2.4.1 RC2)&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;MDSCOUNT=1&lt;br/&gt;
OSTCOUNT=7&lt;br/&gt;
MDSSIZE=2097152&lt;br/&gt;
OSTSIZE=2097152&lt;/p&gt;

&lt;p&gt;In &lt;b&gt;autotest&lt;/b&gt; run, parallel-scale test simul timed out in &lt;b&gt;3600s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/7a8c3714-18ab-11e3-aa54-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/7a8c3714-18ab-11e3-aa54-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Dmesg on OSS node showed that:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;txg_sync      D 0000000000000000     0 30848      2 0x00000080
 ffff880038d8bba0 0000000000000046 ffff8800ffffffff 0000c4392b16901e
 000000001a86df70 ffff88006f691f30 00000000013837fa ffffffffadd58c90
 ffff88002319d098 ffff880038d8bfd8 000000000000fb88 ffff88002319d098
Call Trace:
 [&amp;lt;ffffffff810a2431&amp;gt;] ? ktime_get_ts+0xb1/0xf0
 [&amp;lt;ffffffff8150ed03&amp;gt;] io_schedule+0x73/0xc0
 [&amp;lt;ffffffffa03e6d4c&amp;gt;] cv_wait_common+0x8c/0x100 [spl]
 [&amp;lt;ffffffff81096da0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa03e3717&amp;gt;] ? taskq_dispatch_ent+0x57/0x110 [spl]
 [&amp;lt;ffffffffa03e6dd8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
 [&amp;lt;ffffffffa052939b&amp;gt;] zio_wait+0xfb/0x190 [zfs]
 [&amp;lt;ffffffffa04c1f2c&amp;gt;] dsl_pool_sync+0xec/0x540 [zfs]
 [&amp;lt;ffffffffa04da82e&amp;gt;] spa_sync+0x39e/0x970 [zfs]
 [&amp;lt;ffffffff8103b8d9&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
 [&amp;lt;ffffffffa04e582a&amp;gt;] txg_sync_thread+0x27a/0x4b0 [zfs]
 [&amp;lt;ffffffff810560a9&amp;gt;] ? set_user_nice+0xc9/0x130
 [&amp;lt;ffffffffa04e55b0&amp;gt;] ? txg_sync_thread+0x0/0x4b0 [zfs]
 [&amp;lt;ffffffffa03e2a3f&amp;gt;] thread_generic_wrapper+0x5f/0x70 [spl]
 [&amp;lt;ffffffffa03e29e0&amp;gt;] ? thread_generic_wrapper+0x0/0x70 [spl]
 [&amp;lt;ffffffff81096a36&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff810969a0&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In &lt;b&gt;manual&lt;/b&gt; test run, parallel-scale test simul passed in &lt;b&gt;8727s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/aacb3504-1a8d-11e3-8fec-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/aacb3504-1a8d-11e3-8fec-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;In the above manual test report, we can see that although all of the sub-tests passed, many of them took very long time to be finished:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;simul                   8727s
connectathon 	        6880s
iorssf 	                5570s
iorfpp 	                5960s
write_append_truncate 	13940s 	
write_disjoint 	        29272s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;ll set up kdump on the test node and run the test manually again to try to get the vmcore dump file.&lt;/p&gt;</comment>
                            <comment id="66898" author="yujian" created="Wed, 18 Sep 2013 09:06:03 +0000"  >&lt;p&gt;Is this ticket related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2476&quot; title=&quot;poor OST file creation rate performance with zfs backend&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2476&quot;&gt;&lt;del&gt;LU-2476&lt;/del&gt;&lt;/a&gt;/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2600&quot; title=&quot;lustre metadata performance is very slow on zfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2600&quot;&gt;&lt;del&gt;LU-2600&lt;/del&gt;&lt;/a&gt; which is about the performance issue on ZFS?&lt;/p&gt;</comment>
                            <comment id="67200" author="yujian" created="Sun, 22 Sep 2013 08:13:04 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/45/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/45/&lt;/a&gt; (2.4.1 RC2)&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;MDSCOUNT=1&lt;br/&gt;
OSTCOUNT=7&lt;br/&gt;
MDSSIZE=2097152&lt;br/&gt;
OSTSIZE=2097152&lt;/p&gt;

&lt;p&gt;In &lt;b&gt;manual&lt;/b&gt; test runs, sanity-quota did not hang, it finally passed in &lt;b&gt;27153s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/529eda72-21ac-11e3-8292-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/529eda72-21ac-11e3-8292-52540035b04c&lt;/a&gt;&lt;br/&gt;
Among the sub-tests, test 2 and 12a took very long time to be finished:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test_2        16566s
test_12a      3196s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="67218" author="bzzz" created="Mon, 23 Sep 2013 08:40:54 +0000"  >&lt;p&gt;could some one run some single test (like simul) with ofd.*.sync_on_lock_cancel set to never please?&lt;/p&gt;</comment>
                            <comment id="67219" author="bzzz" created="Mon, 23 Sep 2013 08:41:52 +0000"  >&lt;p&gt;I did submit &lt;a href=&quot;http://review.whamcloud.com/#/c/7711/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/7711/&lt;/a&gt; few days ago, but Jira hasn&apos;t started even ..&lt;/p&gt;</comment>
                            <comment id="67220" author="yujian" created="Mon, 23 Sep 2013 08:55:33 +0000"  >&lt;blockquote&gt;&lt;p&gt;could some one run some single test (like simul) with ofd.*.sync_on_lock_cancel set to never please?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Patch for Lustre b2_4 branch is in &lt;a href=&quot;http://review.whamcloud.com/7725&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7725&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;A manual test run is ongoing on wtm-&lt;span class=&quot;error&quot;&gt;&amp;#91;82-85&amp;#93;&lt;/span&gt;, which are the test nodes where the parallel-scale test was performed against Lustre 2.4.1 RC2 previously. We&apos;ll compare the test results.&lt;/p&gt;</comment>
                            <comment id="67323" author="yujian" created="Tue, 24 Sep 2013 06:39:19 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/45/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/45/&lt;/a&gt; (2.4.1 RC2)&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;MDSCOUNT=1&lt;br/&gt;
OSTCOUNT=7&lt;br/&gt;
MDSSIZE=2097152&lt;br/&gt;
OSTSIZE=2097152&lt;/p&gt;

&lt;p&gt;Without the patch, parallel-scale test simul passed in &lt;b&gt;8727s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/aacb3504-1a8d-11e3-8fec-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/aacb3504-1a8d-11e3-8fec-52540035b04c&lt;/a&gt;&lt;br/&gt;
With the patch, parallel-scale test simul passed in &lt;b&gt;189s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/a0eb119e-25af-11e3-aeff-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/a0eb119e-25af-11e3-aeff-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="67329" author="bzzz" created="Tue, 24 Sep 2013 09:13:25 +0000"  >&lt;p&gt;ahaha&lt;/p&gt;</comment>
                            <comment id="67515" author="bzzz" created="Wed, 25 Sep 2013 11:14:50 +0000"  >&lt;p&gt;Jian, can you tell which devices were used on OST in that 189s run? thanks.&lt;/p&gt;</comment>
                            <comment id="67516" author="bzzz" created="Wed, 25 Sep 2013 12:11:06 +0000"  >&lt;p&gt;yet another question: if we&apos;re using raw devices for ZFS pools (not /dev/loop*), then what I/O scheduler is used?&lt;/p&gt;</comment>
                            <comment id="67525" author="yujian" created="Wed, 25 Sep 2013 13:08:12 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;The devices are SCSI disk partitions.&lt;/p&gt;

&lt;p&gt;For I/O scheduler:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /sys/block/sdb/queue/scheduler
noop anticipatory [deadline] cfq
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="67526" author="bzzz" created="Wed, 25 Sep 2013 13:09:42 +0000"  >&lt;p&gt;thanks. no luck then..&lt;/p&gt;</comment>
                            <comment id="67548" author="bzzz" created="Wed, 25 Sep 2013 15:43:46 +0000"  >&lt;p&gt;Jian, would you mind to try with noop please? (on the clean master branch)&lt;/p&gt;</comment>
                            <comment id="67639" author="yujian" created="Thu, 26 Sep 2013 02:19:05 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1689/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/1689/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;br/&gt;
FSTYPE=zfs&lt;br/&gt;
Test nodes: wtm-&lt;span class=&quot;error&quot;&gt;&amp;#91;82-85&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;With the default &lt;b&gt;&quot;deadline&quot;&lt;/b&gt; I/O scheduler:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# pdsh -l root -S -w wtm-[84,85] &quot;cat /sys/block/sdb/queue/scheduler&quot;
wtm-84: noop anticipatory [deadline] cfq
wtm-85: noop anticipatory [deadline] cfq
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;parallel-scale test simul passed in &lt;b&gt;13457s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/a154ee76-2680-11e3-b741-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/a154ee76-2680-11e3-b741-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;With the &lt;b&gt;&quot;noop&quot;&lt;/b&gt; I/O scheduler:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# pdsh -l root -S -w wtm-[84,85] &quot;cat /sys/block/sdb/queue/scheduler&quot;
wtm-84: [noop] anticipatory deadline cfq
wtm-85: [noop] anticipatory deadline cfq
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;parallel-scale test simul passed in &lt;b&gt;13589s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/dae61290-26a4-11e3-94b1-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/dae61290-26a4-11e3-94b1-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="67660" author="bzzz" created="Thu, 26 Sep 2013 10:11:50 +0000"  >&lt;p&gt;given no quick reply with the results, it&apos;s going to take another zillion seconds &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;


&lt;p&gt;would you mind to run manually simul and grab  /proc/spl/kstat/zfs/txgs-* few times over the run?&lt;/p&gt;</comment>
                            <comment id="67661" author="bzzz" created="Thu, 26 Sep 2013 10:15:08 +0000"  >&lt;p&gt;I guess it makes sense to specify zfs_txg_history=500 (or 1000) when you load zfs module.&lt;/p&gt;</comment>
                            <comment id="67663" author="yujian" created="Thu, 26 Sep 2013 10:27:34 +0000"  >&lt;blockquote&gt;&lt;p&gt;would you mind to run manually simul and grab /proc/spl/kstat/zfs/txgs-* few times over the run?&lt;/p&gt;&lt;/blockquote&gt;
&lt;blockquote&gt;&lt;p&gt;I guess it makes sense to specify zfs_txg_history=500 (or 1000) when you load zfs module.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;OK, let me do more experiments.&lt;/p&gt;</comment>
                            <comment id="67669" author="yujian" created="Thu, 26 Sep 2013 11:30:57 +0000"  >&lt;p&gt;Hi Alex,&lt;br/&gt;
Since the simul test is still running with noop scheduler, I did not stop it and gathered the /proc/spl/kstat/zfs/txgs-* data from both MDS (wtm-84) and OSS (wtm-85) for three times. Please refer to the attachments.&lt;/p&gt;</comment>
                            <comment id="67670" author="bzzz" created="Thu, 26 Sep 2013 11:33:23 +0000"  >&lt;p&gt;thanks! learning ...&lt;/p&gt;</comment>
                            <comment id="67672" author="bzzz" created="Thu, 26 Sep 2013 11:40:42 +0000"  >&lt;p&gt;do I understand correctly that all OSTs are using the same physical device?&lt;/p&gt;</comment>
                            <comment id="67673" author="yujian" created="Thu, 26 Sep 2013 11:55:22 +0000"  >&lt;blockquote&gt;&lt;p&gt;do I understand correctly that all OSTs are using the same physical device?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Yes.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;ost_HOST=wtm-85
OSTCOUNT=7
OSTSIZE=2097152
OSTDEV1=/dev/sdb5 OSTDEV2=/dev/sdb6 OSTDEV3=/dev/sdb7 OSTDEV4=/dev/sdb8 OSTDEV5=/dev/sdb9 OSTDEV6=/dev/sdb10 OSTDEV7=/dev/sdb11
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="67675" author="bzzz" created="Thu, 26 Sep 2013 11:58:00 +0000"  >&lt;p&gt;would it be possible to run simul using single OST on a non-shared phys.device?&lt;/p&gt;</comment>
                            <comment id="67677" author="yujian" created="Thu, 26 Sep 2013 12:10:13 +0000"  >&lt;blockquote&gt;&lt;p&gt;would it be possible to run simul using single OST on a non-shared phys.device?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Sure. I can start a new test run now because the previous simul test run was just finished. I&apos;m uploading the Maloo report.&lt;/p&gt;

&lt;p&gt;So, the new test run will be performed with single OST, deadline I/O scheduler, and zfs_txg_history=1000 while loading zfs module.&lt;/p&gt;</comment>
                            <comment id="67678" author="bzzz" created="Thu, 26 Sep 2013 12:11:28 +0000"  >&lt;p&gt;please, make sure that single OST uses a physical device nobody else uses (including MDT). so the idea is have one ZFS pool in use on that device and zero concurrency between different ZFS pools. thanks in advance.&lt;/p&gt;</comment>
                            <comment id="67684" author="yujian" created="Thu, 26 Sep 2013 13:49:20 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1689/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/1689/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;br/&gt;
FSTYPE=zfs&lt;/p&gt;

&lt;p&gt;Test configuration:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;mds_HOST=wtm-84 MDSCOUNT=1 MDSSIZE=2097152  MDSDEV1=/dev/sdc 
ost_HOST=wtm-85 OSTCOUNT=1 OSTSIZE=15000000 OSTDEV1=/dev/sdd
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# pdsh -l root -S -w wtm-84 &quot;cat /sys/block/sdc/queue/scheduler&quot;
wtm-84: [noop] anticipatory deadline cfq

# pdsh -l root -S -w wtm-85 &quot;cat /sys/block/sdd/queue/scheduler&quot;
wtm-85: [noop] anticipatory deadline cfq
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;parallel-scale test simul passed in &lt;b&gt;1950s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/55b02a2c-26b1-11e3-9d3a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/55b02a2c-26b1-11e3-9d3a-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="67685" author="bzzz" created="Thu, 26 Sep 2013 14:07:47 +0000"  >&lt;p&gt;hmm, sounds interesting.. any idea whether simul&apos;s workset depends on the space available? or probably it makes sense to return with OSTSIZE=$((6*2097152)) ?&lt;/p&gt;</comment>
                            <comment id="67707" author="yujian" created="Thu, 26 Sep 2013 15:37:41 +0000"  >&lt;p&gt;I think the performance of simul test does not depend on the available space on OST because no parameter is calculated from the available space on OST, and the simul test mainly does the open/close/unlink/rename/creat/truncate, etc. operations.&lt;br/&gt;
In the above test run, I used OSTSIZE=15000000 (about 14G). Just re-ran the test with OSTSIZE=$((6*2097152))=12582912 (about 12G) and it passed in &lt;b&gt;2095s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/e089858a-26c0-11e3-83d1-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/e089858a-26c0-11e3-83d1-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="67708" author="bzzz" created="Thu, 26 Sep 2013 15:42:28 +0000"  >&lt;p&gt;thanks a lot for help. it seems the issue is due to two factors: 1) expensive sync (to be learned yet) 2) high cost of competing of few pools on a same phys.device. I don&apos;t think anybody will be running few pools on a same device in wild..&lt;/p&gt;</comment>
                            <comment id="67711" author="yujian" created="Thu, 26 Sep 2013 16:18:17 +0000"  >&lt;p&gt;You&apos;re welcome, Alex.&lt;/p&gt;

&lt;p&gt;I just checked the autotest configuration and found the OST devices used by it were:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;OSTDEV1=/dev/lvm-OSS/P1
OSTDEV2=/dev/lvm-OSS/P2
OSTDEV3=/dev/lvm-OSS/P3
OSTDEV4=/dev/lvm-OSS/P4
OSTDEV5=/dev/lvm-OSS/P5
OSTDEV6=/dev/lvm-OSS/P6
OSTDEV7=/dev/lvm-OSS/P7
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It was likely that all of the LVs were created from the same PV, especially on virtual machines. Since most of the test nodes used by autotest system are VMs, and ZFS full test runs are performed by autotest regularly, it&apos;s hard to bypass the above factor 2).&lt;/p&gt;</comment>
                            <comment id="67735" author="bzzz" created="Thu, 26 Sep 2013 17:37:54 +0000"  >&lt;p&gt;frankly I don&apos;t think we should be trying to improve ZFS for such an environment - it&apos;s not realistic to have many pools on a same device. solution to this isn&apos;t obvious either.&lt;/p&gt;</comment>
                            <comment id="67736" author="utopiabound" created="Thu, 26 Sep 2013 17:44:09 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/7778&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7778&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Skip simul test for ZFS, since it takes too long.&lt;/p&gt;</comment>
                            <comment id="67762" author="utopiabound" created="Thu, 26 Sep 2013 20:17:07 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/7780&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7780&lt;/a&gt; - FOR TEST ONLY patch to run parallel-scale with only 2 OSTs&lt;/p&gt;</comment>
                            <comment id="67796" author="yujian" created="Fri, 27 Sep 2013 07:04:42 +0000"  >&lt;p&gt;Hi Alex and Nathaniel,&lt;/p&gt;

&lt;p&gt;FYI, all of the following sub-tests run slowly on ZFS under the current test environment (7 OSTs share the same physical device):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sanityn:
    test 33a                    13949s

sanity-quota:
    test 2                      16905s
    test 12a                    2556s

replay-ost-single:
    test 8a                     2326s
    test 8b                     2290s

parallel-scale:
    test compilebench           6913s
    test simul                  8727s
    test connectathon 	        6880s
    test iorssf 	        5570s
    test iorfpp 	        5960s
    test write_append_truncate 	13940s 	
    test write_disjoint 	29272s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We&apos;d better not to skip those tests for ZFS. We can improve the test scripts to reduce file sizes/counts, iteration times, etc. to make the tests still run for ZFS within a reasonable time (less than 1800s with SLOW=yes).&lt;/p&gt;</comment>
                            <comment id="67797" author="bzzz" created="Fri, 27 Sep 2013 07:06:51 +0000"  >&lt;p&gt;can we reconfigure with OSTCOUNT=1 for parallel-scale specifically ?&lt;/p&gt;</comment>
                            <comment id="67798" author="yujian" created="Fri, 27 Sep 2013 07:37:50 +0000"  >&lt;blockquote&gt;&lt;p&gt;can we reconfigure with OSTCOUNT=1 for parallel-scale specifically ?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;This needs changes on autotest system. We can get the result of parallel-scale test with SLOW=no first to see whether those sub-tests can pass within a reasonable time or not.&lt;/p&gt;</comment>
                            <comment id="67923" author="yujian" created="Sun, 29 Sep 2013 06:50:21 +0000"  >&lt;p&gt;In manual test run with SLOW=no and OSTCOUNT=7, parallel-scale test simul passed in &lt;b&gt;112s&lt;/b&gt;: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/4b220e1e-28c7-11e3-8951-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/4b220e1e-28c7-11e3-8951-52540035b04c&lt;/a&gt;&lt;br/&gt;
However, the following sub-tests still took very long time:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;compilebench    7760s
iorssf          5061s
iorfpp          5748s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Let&apos;s wait for the autotest test result in &lt;a href=&quot;http://review.whamcloud.com/7778&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7778&lt;/a&gt; to do a comparison.&lt;/p&gt;</comment>
                            <comment id="67939" author="yujian" created="Mon, 30 Sep 2013 13:54:47 +0000"  >&lt;blockquote&gt;&lt;p&gt;Let&apos;s wait for the autotest test result in &lt;a href=&quot;http://review.whamcloud.com/7778&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7778&lt;/a&gt; to do a comparison.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;In autotest test run with SLOW=no and OSTCOUNT=7, parallel-scale timed out: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/487e3fe6-29c3-11e3-b5ea-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/487e3fe6-29c3-11e3-b5ea-52540035b04c&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;compilebench    6005s
metabench       14400s (TIMEOUT)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We still have to reduce the number of cbench_IDIRS, cbench_RUNS, mbench_NFILES, etc. for ZFS.&lt;/p&gt;</comment>
                            <comment id="67982" author="utopiabound" created="Mon, 30 Sep 2013 18:48:52 +0000"  >&lt;p&gt;From 7778 &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/487e3fe6-29c3-11e3-b5ea-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/487e3fe6-29c3-11e3-b5ea-52540035b04c&lt;/a&gt;&lt;br/&gt;
the metabench result seems very strange.&lt;/p&gt;

&lt;p&gt;metabench normally runs fairly quickly (~200-500) judging by the results in maloo for ZFS runs.&lt;/p&gt;

&lt;p&gt;It looks like the whole system (client-30) just went out to lunch for 4 hours.&lt;/p&gt;</comment>
                            <comment id="68326" author="utopiabound" created="Thu, 3 Oct 2013 21:23:04 +0000"  >&lt;p&gt;Reduce performance expectation for ZFS in sanity-quota/0, lowest observed over last 4 weeks is ~150.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/7848&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7848&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="69141" author="adilger" created="Wed, 16 Oct 2013 17:13:16 +0000"  >&lt;p&gt;I would prefer the approach of setting OSTCOUNT=2 for ZFS-backed test filesystems.  This is more in line with real systems, since we will typically only have a single OST per OSS with ZFS instead of 4 or more OSTs per OSS with ldiskfs.  I think a lot of tests depend on having at least two OSTs, so OSTCOUNT=1 will probably cause some tests to be skipped.&lt;/p&gt;

&lt;p&gt;Another complimentary approach would be to format a single ZFS pool across a few LVs and then have the different OST/MDT targets in their own datasets in the shared pool.  That would avoid the extra commits caused by having separate pools.  The drawback is that all of the datasets would store their files into the same space, so some of the Lustre tests would be broken if we don&apos;t add ZFS reservations for the minimum size of the datasets (e.g. fill one OST and allocate objects to another OST would break).&lt;/p&gt;</comment>
                            <comment id="69204" author="yujian" created="Thu, 17 Oct 2013 13:55:22 +0000"  >&lt;blockquote&gt;&lt;p&gt;I would prefer the approach of setting OSTCOUNT=2 for ZFS-backed test filesystems.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I just created TEI-790 to ask TEI team for help on this change.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;Another complimentary approach would be to format a single ZFS pool across a few LVs and then have the different OST/MDT targets in their own datasets in the shared pool.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;This needs change mdsdevname(), ostdevname() and the failover testing support codes for ZFS (&lt;a href=&quot;http://review.whamcloud.com/6429&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/6429&lt;/a&gt;) in test-framework.sh.&lt;/p&gt;</comment>
                            <comment id="70464" author="yujian" created="Fri, 1 Nov 2013 05:52:32 +0000"  >&lt;p&gt;Since TEI-790 was fixed, I&apos;ve triggered a full group test session on ZFS against Lustre b2_4 build #47. I&apos;ll vet the test results to see whether the timed-out tests can pass or not with OSTCOUNT=2 and SLOW=yes.&lt;/p&gt;</comment>
                            <comment id="70594" author="yujian" created="Mon, 4 Nov 2013 10:34:35 +0000"  >&lt;p&gt;Here is the test result on Lustre b2_4 build #47 with FSTYPE=zfs, OSTCOUNT=2 and SLOW=yes:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sessions/05b82736-444d-11e3-8472-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sessions/05b82736-444d-11e3-8472-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Timeout failures with OSTCOUNT=7 on the following sub-tests disappeared with OSTCOUNT=2:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sanity-benchmark     test bonnie        LU-1960
replay-ost-single    test 5             LU-2887
sanity-quota         test 7a and 12a    LU-2887
large-scale          test 3a            LU-2887
obdfilter-survey     test 1a            LU-2124
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;However, timeout failure on sanityn test 33a (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2829&quot; title=&quot;Timeout on sanityn test_33a: zfs slow when commit_on_sharing enabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2829&quot;&gt;&lt;del&gt;LU-2829&lt;/del&gt;&lt;/a&gt;) still occurred.&lt;/p&gt;

&lt;p&gt;And because of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3906&quot; title=&quot;Failure on test suite parallel-scale test_compilebench: IOError, No space left on device&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3906&quot;&gt;&lt;del&gt;LU-3906&lt;/del&gt;&lt;/a&gt; (out of space issue), parallel-scale, parallel-scale-nfsv3 and parallel-scale-nfsv4 tests were not really run, then we do not know whether the timeout failures on them disappear or not with OSTCOUNT=2.&lt;/p&gt;</comment>
                            <comment id="71610" author="yujian" created="Fri, 15 Nov 2013 06:06:19 +0000"  >&lt;blockquote&gt;&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/7778&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7778&lt;/a&gt;&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;The above patch and &lt;a href=&quot;http://review.whamcloud.com/8234&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8234&lt;/a&gt; have landed on master branch. They are also needed on Lustre b2_4 and b2_5 branches.&lt;/p&gt;

&lt;p&gt;The two patches are combined and back-ported to Lustre b2_4 branch: &lt;a href=&quot;http://review.whamcloud.com/8284&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8284&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="72107" author="yujian" created="Fri, 22 Nov 2013 08:11:24 +0000"  >&lt;p&gt;Patch &lt;a href=&quot;http://review.whamcloud.com/8284&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8284&lt;/a&gt; landed on Lustre b2_4 branch. I&apos;ll trigger ZFS full group test session on Lustre b2_4 build #57 to check the test results. &lt;/p&gt;</comment>
                            <comment id="72290" author="yujian" created="Tue, 26 Nov 2013 08:04:19 +0000"  >&lt;p&gt;Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/58/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/58/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;/p&gt;

&lt;p&gt;FSTYPE=zfs&lt;br/&gt;
MDSCOUNT=1&lt;br/&gt;
MDSSIZE=2097152&lt;br/&gt;
OSTCOUNT=2&lt;br/&gt;
OSTSIZE=2097152&lt;/p&gt;

&lt;p&gt;replay-ost-single test 5 still timed out: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/a76a6b78-5606-11e3-8e94-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/a76a6b78-5606-11e3-8e94-52540035b04c&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;txg_sync      D 0000000000000000     0 32309      2 0x00000080
 ffff88004925dba0 0000000000000046 ffff8800ffffffff 00001e6701abaaf4
 000000005cd72ac8 ffff88007126d2f0 00000000001ec6e4 ffffffffad6d9703
 ffff880072f07ab8 ffff88004925dfd8 000000000000fb88 ffff880072f07ab8
Call Trace:
 [&amp;lt;ffffffff810a2431&amp;gt;] ? ktime_get_ts+0xb1/0xf0
 [&amp;lt;ffffffff8150ed93&amp;gt;] io_schedule+0x73/0xc0
 [&amp;lt;ffffffffa03e6d4c&amp;gt;] cv_wait_common+0x8c/0x100 [spl]
 [&amp;lt;ffffffff81096da0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa03e3717&amp;gt;] ? taskq_dispatch_ent+0x57/0x110 [spl]
 [&amp;lt;ffffffffa03e6dd8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
 [&amp;lt;ffffffffa052939b&amp;gt;] zio_wait+0xfb/0x190 [zfs]
 [&amp;lt;ffffffffa04c1f2c&amp;gt;] dsl_pool_sync+0xec/0x540 [zfs]
 [&amp;lt;ffffffffa04da82e&amp;gt;] spa_sync+0x39e/0x970 [zfs]
 [&amp;lt;ffffffff8103b8d9&amp;gt;] ? kvm_clock_get_cycles+0x9/0x10
 [&amp;lt;ffffffffa04e582a&amp;gt;] txg_sync_thread+0x27a/0x4b0 [zfs]
 [&amp;lt;ffffffff810560a9&amp;gt;] ? set_user_nice+0xc9/0x130
 [&amp;lt;ffffffffa04e55b0&amp;gt;] ? txg_sync_thread+0x0/0x4b0 [zfs]
 [&amp;lt;ffffffffa03e2a3f&amp;gt;] thread_generic_wrapper+0x5f/0x70 [spl]
 [&amp;lt;ffffffffa03e29e0&amp;gt;] ? thread_generic_wrapper+0x0/0x70 [spl]
 [&amp;lt;ffffffff81096a36&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff810969a0&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="73551" author="yujian" created="Mon, 16 Dec 2013 05:10:09 +0000"  >&lt;p&gt;Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_4/67/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b2_4/67/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64&lt;/p&gt;

&lt;p&gt;FSTYPE=zfs&lt;br/&gt;
MDSCOUNT=1&lt;br/&gt;
MDSSIZE=2097152&lt;br/&gt;
OSTCOUNT=2&lt;br/&gt;
OSTSIZE=8388608&lt;/p&gt;

&lt;p&gt;performance-sanity test 4 timed out in 28800s:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/68896e68-6579-11e3-8518-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/68896e68-6579-11e3-8518-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;parallel-scale test metabench timed out in 14400s:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/bdff6c44-6579-11e3-8518-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/bdff6c44-6579-11e3-8518-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="73935" author="pjones" created="Fri, 20 Dec 2013 15:10:21 +0000"  >&lt;p&gt;Landed for 2.4.2 and 2.6. Will land for 2.5.1 shortly.&lt;/p&gt;</comment>
                            <comment id="95400" author="adilger" created="Wed, 1 Oct 2014 08:13:45 +0000"  >&lt;p&gt;sanity-quota.sh test_12a is still being skipped on ZFS due to this bug, reopen until problem is actually fixed.  At a minimum, the skip should only be done in the case of VM nodes, or ZFS-on-LVM or something.&lt;/p&gt;</comment>
                            <comment id="128890" author="bzzz" created="Wed, 30 Sep 2015 14:06:12 +0000"  >&lt;p&gt;ZIL will improve this&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10120">
                    <name>Blocker</name>
                                                                <inwardlinks description="is blocked by">
                                        <issuelink>
            <issuekey id="21123">LU-4009</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="21440">LU-4108</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="16359">LU-2176</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="17708">LU-2872</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="18199">LU-3089</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="18535">LU-3225</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="13017">LU-2085</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="16003">LU-1960</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="17614">LU-2829</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="17711">LU-2874</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="18249">LU-3109</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="17632">LU-2836</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="25041">LU-5148</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="17736">LU-2891</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="17869">LU-2955</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="21293">LU-4072</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="22642">LU-4444</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="16296">LU-2124</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="24369">LU-4950</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="13549" name="txgs.1380193340.log" size="55819" author="yujian" created="Thu, 26 Sep 2013 11:30:57 +0000"/>
                            <attachment id="13550" name="txgs.1380193506.log" size="55819" author="yujian" created="Thu, 26 Sep 2013 11:30:57 +0000"/>
                            <attachment id="13551" name="txgs.1380194292.log" size="55819" author="yujian" created="Thu, 26 Sep 2013 11:30:57 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvk0v:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6968</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>