<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:24:30 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9247] replay-ost-single test_5: test failed to respond and timed out</title>
                <link>https://jira.whamcloud.com/browse/LU-9247</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/afc7f4b0-0af4-11e7-8c9f-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/afc7f4b0-0af4-11e7-8c9f-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;It appears that zfs was hung and caused this timeout.  Here are a couple indications of this:&lt;/p&gt;

&lt;p&gt;test_log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Starting ost1: lustre-ost1/ost1 /mnt/lustre-ost1
CMD: onyx-32vm8 mkdir -p /mnt/lustre-ost1; mount -t lustre lustre-ost1/ost1 /mnt/lustre-ost1
onyx-32vm8: e2label: No such file or directory while trying to open lustre-ost1/ost1
onyx-32vm8: Couldn&apos;t find valid filesystem superblock.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;OST console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;10:35:06:[31399.498089] txg_sync        D 0000000000000001     0 27626      2 0x00000080
10:35:06:[31399.498090]  ffff880049607ac0 0000000000000046 ffff88003d98edd0 ffff880049607fd8
10:35:06:[31399.498091]  ffff880049607fd8 ffff880049607fd8 ffff88003d98edd0 ffff88007fc16c40
10:35:06:[31399.498092]  0000000000000000 7fffffffffffffff ffff88005ac587a8 0000000000000001
10:35:06:[31399.498092] Call Trace:
10:35:06:[31399.498093]  [&amp;lt;ffffffff8168bac9&amp;gt;] schedule+0x29/0x70
10:35:06:[31399.498095]  [&amp;lt;ffffffff81689519&amp;gt;] schedule_timeout+0x239/0x2d0
10:35:06:[31399.498096]  [&amp;lt;ffffffff810c4fe2&amp;gt;] ? default_wake_function+0x12/0x20
10:35:06:[31399.498098]  [&amp;lt;ffffffff810ba238&amp;gt;] ? __wake_up_common+0x58/0x90
10:35:06:[31399.498101]  [&amp;lt;ffffffff81060c1f&amp;gt;] ? kvm_clock_get_cycles+0x1f/0x30
10:35:06:[31399.498103]  [&amp;lt;ffffffff8168b06e&amp;gt;] io_schedule_timeout+0xae/0x130
10:35:06:[31399.498104]  [&amp;lt;ffffffff810b1416&amp;gt;] ? prepare_to_wait_exclusive+0x56/0x90
10:35:06:[31399.498106]  [&amp;lt;ffffffff8168b108&amp;gt;] io_schedule+0x18/0x20
10:35:06:[31399.498109]  [&amp;lt;ffffffffa0677617&amp;gt;] cv_wait_common+0xa7/0x130 [spl]
10:35:06:[31399.498111]  [&amp;lt;ffffffff810b1720&amp;gt;] ? wake_up_atomic_t+0x30/0x30
10:35:06:[31399.498114]  [&amp;lt;ffffffffa06776f8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
10:35:06:[31399.498150]  [&amp;lt;ffffffffa07d151b&amp;gt;] zio_wait+0x10b/0x1f0 [zfs]
10:35:06:[31399.498169]  [&amp;lt;ffffffffa075acdf&amp;gt;] dsl_pool_sync+0xbf/0x440 [zfs]
10:35:06:[31399.498187]  [&amp;lt;ffffffffa0775868&amp;gt;] spa_sync+0x388/0xb50 [zfs]
10:35:06:[31399.498189]  [&amp;lt;ffffffff810b174b&amp;gt;] ? autoremove_wake_function+0x2b/0x40
10:35:06:[31399.498191]  [&amp;lt;ffffffff81689c72&amp;gt;] ? mutex_lock+0x12/0x2f
10:35:06:[31399.498208]  [&amp;lt;ffffffffa07874e5&amp;gt;] txg_sync_thread+0x3c5/0x620 [zfs]
10:35:06:[31399.498226]  [&amp;lt;ffffffffa0787120&amp;gt;] ? txg_init+0x280/0x280 [zfs]
10:35:06:[31399.498229]  [&amp;lt;ffffffffa0672851&amp;gt;] thread_generic_wrapper+0x71/0x80 [spl]
10:35:06:[31399.498232]  [&amp;lt;ffffffffa06727e0&amp;gt;] ? __thread_exit+0x20/0x20 [spl]
10:35:06:[31399.498234]  [&amp;lt;ffffffff810b064f&amp;gt;] kthread+0xcf/0xe0
10:35:06:[31399.498235]  [&amp;lt;ffffffff810b0580&amp;gt;] ? kthread_create_on_node+0x140/0x140
10:35:06:[31399.498237]  [&amp;lt;ffffffff81696958&amp;gt;] ret_from_fork+0x58/0x90
10:35:06:[31399.498239]  [&amp;lt;ffffffff810b0580&amp;gt;] ? kthread_create_on_node+0x140/0x140
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>onyx-32vm1-8, Full Group test, &lt;br/&gt;
RHEL7.3/zfs, branch master, v2.9.54, b3541</environment>
        <key id="44983">LU-9247</key>
            <summary>replay-ost-single test_5: test failed to respond and timed out</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="jcasper">James Casper</reporter>
                        <labels>
                    </labels>
                <created>Thu, 23 Mar 2017 16:14:34 +0000</created>
                <updated>Thu, 16 Apr 2020 07:35:02 +0000</updated>
                            <resolved>Thu, 16 Apr 2020 07:35:02 +0000</resolved>
                                    <version>Lustre 2.10.0</version>
                    <version>Lustre 2.10.1</version>
                    <version>Lustre 2.11.0</version>
                    <version>Lustre 2.10.7</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="189798" author="sarah" created="Mon, 27 Mar 2017 21:05:13 +0000"  >&lt;p&gt;not sure if this is a dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4950&quot; title=&quot;sanity-benchmark test fsx hung: txg_sync was stuck on OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4950&quot;&gt;&lt;del&gt;LU-4950&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="189799" author="sarah" created="Mon, 27 Mar 2017 21:10:07 +0000"  >&lt;p&gt;another similar failure seen on sanity-quota on the same run&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/ddb7634c-0af4-11e7-8c9f-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/ddb7634c-0af4-11e7-8c9f-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;oss dmesg&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 6136.708017] txg_sync        D 0000000000000001     0 29681      2 0x00000080
[ 6136.709697]  ffff880058e87ac0 0000000000000046 ffff88005f6f5e20 ffff880058e87fd8
[ 6136.711425]  ffff880058e87fd8 ffff880058e87fd8 ffff88005f6f5e20 ffff88007fc16c40
[ 6136.713142]  0000000000000000 7fffffffffffffff ffff880061d92ec0 0000000000000001
[ 6136.714847] Call Trace:
[ 6136.716104]  [&amp;lt;ffffffff8168bac9&amp;gt;] schedule+0x29/0x70
[ 6136.717564]  [&amp;lt;ffffffff81689519&amp;gt;] schedule_timeout+0x239/0x2d0
[ 6136.719086]  [&amp;lt;ffffffff810c4fe2&amp;gt;] ? default_wake_function+0x12/0x20
[ 6136.720623]  [&amp;lt;ffffffff810ba238&amp;gt;] ? __wake_up_common+0x58/0x90
[ 6136.722131]  [&amp;lt;ffffffff81060c1f&amp;gt;] ? kvm_clock_get_cycles+0x1f/0x30
[ 6136.723666]  [&amp;lt;ffffffff8168b06e&amp;gt;] io_schedule_timeout+0xae/0x130
[ 6136.725197]  [&amp;lt;ffffffff810b1416&amp;gt;] ? prepare_to_wait_exclusive+0x56/0x90
[ 6136.726767]  [&amp;lt;ffffffff8168b108&amp;gt;] io_schedule+0x18/0x20
[ 6136.728231]  [&amp;lt;ffffffffa0670617&amp;gt;] cv_wait_common+0xa7/0x130 [spl]
[ 6136.729775]  [&amp;lt;ffffffff810b1720&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[ 6136.731277]  [&amp;lt;ffffffffa06706f8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
[ 6136.732815]  [&amp;lt;ffffffffa07ca51b&amp;gt;] zio_wait+0x10b/0x1f0 [zfs]
[ 6136.734304]  [&amp;lt;ffffffffa0753edc&amp;gt;] dsl_pool_sync+0x2bc/0x440 [zfs]
[ 6136.735829]  [&amp;lt;ffffffffa076e868&amp;gt;] spa_sync+0x388/0xb50 [zfs]
[ 6136.737299]  [&amp;lt;ffffffff810b174b&amp;gt;] ? autoremove_wake_function+0x2b/0x40
[ 6136.738871]  [&amp;lt;ffffffff81689c72&amp;gt;] ? mutex_lock+0x12/0x2f
[ 6136.740337]  [&amp;lt;ffffffffa07804e5&amp;gt;] txg_sync_thread+0x3c5/0x620 [zfs]
[ 6136.741859]  [&amp;lt;ffffffffa0780120&amp;gt;] ? txg_init+0x280/0x280 [zfs]
[ 6136.743323]  [&amp;lt;ffffffffa066b851&amp;gt;] thread_generic_wrapper+0x71/0x80 [spl]
[ 6136.744868]  [&amp;lt;ffffffffa066b7e0&amp;gt;] ? __thread_exit+0x20/0x20 [spl]
[ 6136.746368]  [&amp;lt;ffffffff810b064f&amp;gt;] kthread+0xcf/0xe0
[ 6136.747868]  [&amp;lt;ffffffff810b0580&amp;gt;] ? kthread_create_on_node+0x140/0x140
[ 6136.749474]  [&amp;lt;ffffffff81696958&amp;gt;] ret_from_fork+0x58/0x90
[ 6136.750975]  [&amp;lt;ffffffff810b0580&amp;gt;] ? kthread_create_on_node+0x140/0x140
[ 6136.752560] inconsistency_v S 0000000000000000     0 29844      2 0x00000080
[ 6136.754234]  ffff8800696f3d98 0000000000000046 ffff88005f6f6dd0 ffff8800696f3fd8
[ 6136.755996]  ffff8800696f3fd8 ffff8800696f3fd8 ffff88005f6f6dd0 ffff88005c176b40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="189800" author="sarah" created="Mon, 27 Mar 2017 21:21:42 +0000"  >&lt;p&gt;more instance.  This issue was suggested by Oleg marked as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8601&quot; title=&quot;sanity test_230d: Timeout on ZFS backed MDSs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8601&quot;&gt;&lt;del&gt;LU-8601&lt;/del&gt;&lt;/a&gt;,  are these 2 the dup?&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/e463d0ae-0af4-11e7-8c9f-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/e463d0ae-0af4-11e7-8c9f-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="191889" author="sarah" created="Thu, 13 Apr 2017 16:24:53 +0000"  >&lt;p&gt;keep seeing the same error on sanity-quota master zfs testing, and the error looks like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6812&quot; title=&quot;sanity-quota test_7a: test failed to respond and timed out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6812&quot;&gt;&lt;del&gt;LU-6812&lt;/del&gt;&lt;/a&gt; which is marked as dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4950&quot; title=&quot;sanity-benchmark test fsx hung: txg_sync was stuck on OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4950&quot;&gt;&lt;del&gt;LU-4950&lt;/del&gt;&lt;/a&gt;.  &lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/5d38651e-1dae-11e7-9de9-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/5d38651e-1dae-11e7-9de9-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="192897" author="pjones" created="Thu, 20 Apr 2017 17:25:18 +0000"  >&lt;p&gt;Alex&lt;/p&gt;

&lt;p&gt;Does this appear to be a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4950&quot; title=&quot;sanity-benchmark test fsx hung: txg_sync was stuck on OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4950&quot;&gt;&lt;del&gt;LU-4950&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="193198" author="bzzz" created="Mon, 24 Apr 2017 13:11:47 +0000"  >&lt;p&gt;Peter, I&apos;m going through the logs, at at the moment it doesn&apos;t look like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4950&quot; title=&quot;sanity-benchmark test fsx hung: txg_sync was stuck on OSS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4950&quot;&gt;&lt;del&gt;LU-4950&lt;/del&gt;&lt;/a&gt; where one thread was spinning in ZFS failing to start transaction (too many blocks to release from a huge file).&lt;/p&gt;</comment>
                            <comment id="196233" author="casperjx" created="Wed, 17 May 2017 19:32:44 +0000"  >&lt;p&gt;This can also happen after an MDS failover.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/edde2a3e-9ae8-434a-8170-b64e9e85529c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/edde2a3e-9ae8-434a-8170-b64e9e85529c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;full group, CentOS7/zfs/DNE&lt;br/&gt;
master branch, v2.9.57, b3575&lt;/p&gt;

&lt;p&gt;From MDS1/3 dmesg:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[66569.880864] txg_sync        D 0000000000000001     0 19793      2 0x00000080
[66569.882578]  ffff880052433ae0 0000000000000046 ffff880063c1bec0 ffff880052433fd8
[66569.884423]  ffff880052433fd8 ffff880052433fd8 ffff880063c1bec0 ffff88007fc16c40
[66569.886169]  0000000000000000 7fffffffffffffff ffff880067d20c00 0000000000000001
[66569.887898] Call Trace:
[66569.889172]  [&amp;lt;ffffffff8168c3c9&amp;gt;] schedule+0x29/0x70
[66569.890664]  [&amp;lt;ffffffff81689e29&amp;gt;] schedule_timeout+0x239/0x2c0
[66569.892192]  [&amp;lt;ffffffff81060c1f&amp;gt;] ? kvm_clock_get_cycles+0x1f/0x30
[66569.893743]  [&amp;lt;ffffffff810eb08c&amp;gt;] ? ktime_get_ts64+0x4c/0xf0
[66569.895244]  [&amp;lt;ffffffff8168b96e&amp;gt;] io_schedule_timeout+0xae/0x130
[66569.896773]  [&amp;lt;ffffffff810b1816&amp;gt;] ? prepare_to_wait_exclusive+0x56/0x90
[66569.898348]  [&amp;lt;ffffffff8168ba08&amp;gt;] io_schedule+0x18/0x20
[66569.899836]  [&amp;lt;ffffffffa06ec617&amp;gt;] cv_wait_common+0xa7/0x130 [spl]
[66569.901382]  [&amp;lt;ffffffff810b1b20&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[66569.902986]  [&amp;lt;ffffffffa06ec6f8&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
[66569.904618]  [&amp;lt;ffffffffa0fd1cc3&amp;gt;] zio_wait+0xc3/0x140 [zfs]
[66569.906167]  [&amp;lt;ffffffffa0f5bfbf&amp;gt;] dsl_pool_sync+0xbf/0x440 [zfs]
[66569.907750]  [&amp;lt;ffffffffa0f763e8&amp;gt;] spa_sync+0x388/0xb50 [zfs]
[66569.909258]  [&amp;lt;ffffffff810b1b4b&amp;gt;] ? autoremove_wake_function+0x2b/0x40
[66569.910857]  [&amp;lt;ffffffff8168a572&amp;gt;] ? mutex_lock+0x12/0x2f
[66569.912439]  [&amp;lt;ffffffffa0f88065&amp;gt;] txg_sync_thread+0x3c5/0x620 [zfs]
[66569.914047]  [&amp;lt;ffffffffa0f87ca0&amp;gt;] ? txg_init+0x280/0x280 [zfs]
[66569.915586]  [&amp;lt;ffffffffa06e7851&amp;gt;] thread_generic_wrapper+0x71/0x80 [spl]
[66569.917185]  [&amp;lt;ffffffffa06e77e0&amp;gt;] ? __thread_exit+0x20/0x20 [spl]
[66569.918744]  [&amp;lt;ffffffff810b0a4f&amp;gt;] kthread+0xcf/0xe0
[66569.920183]  [&amp;lt;ffffffff810b0980&amp;gt;] ? kthread_create_on_node+0x140/0x140
[66569.921777]  [&amp;lt;ffffffff81697318&amp;gt;] ret_from_fork+0x58/0x90
[66569.923264]  [&amp;lt;ffffffff810b0980&amp;gt;] ? kthread_create_on_node+0x140/0x140
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="196354" author="bzzz" created="Thu, 18 May 2017 15:14:33 +0000"  >&lt;p&gt;please, tell the exact test you found the last trace.&lt;/p&gt;</comment>
                            <comment id="196357" author="casperjx" created="Thu, 18 May 2017 15:48:39 +0000"  >&lt;p&gt;Sorry about that: replay-single, test_70c&lt;/p&gt;</comment>
                            <comment id="196383" author="bzzz" created="Thu, 18 May 2017 19:00:45 +0000"  >&lt;p&gt;hm, I think it&apos;s important to notice that the stack itself seem to be valid - syncing thread was awaiting for I/O.&lt;/p&gt;</comment>
                            <comment id="206243" author="bzzz" created="Thu, 24 Aug 2017 13:24:53 +0000"  >&lt;p&gt;first of all, many reports marked with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9247&quot; title=&quot;replay-ost-single test_5: test failed to respond and timed out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9247&quot;&gt;&lt;del&gt;LU-9247&lt;/del&gt;&lt;/a&gt; were runs with ldiskfs, so it&apos;s not ZFS specific.&lt;/p&gt;

&lt;p&gt;the client got stuck at dd:&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.740321&amp;#93;&lt;/span&gt; dd              S 0000000000000000     0 13435  12942 0x00000080&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.748089&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8168b679&amp;gt;&amp;#93;&lt;/span&gt; schedule+0x29/0x70&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.749516&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0827cfd&amp;gt;&amp;#93;&lt;/span&gt; cl_sync_io_wait+0x2ad/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.751062&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810c4ec0&amp;gt;&amp;#93;&lt;/span&gt; ? wake_up_state+0x20/0x20&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.752515&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0827f48&amp;gt;&amp;#93;&lt;/span&gt; cl_io_submit_sync+0x138/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.754107&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0cb2620&amp;gt;&amp;#93;&lt;/span&gt; vvp_io_write_commit+0x640/0x8d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.755663&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0cb2dfa&amp;gt;&amp;#93;&lt;/span&gt; vvp_io_write_start+0x54a/0x720 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.757211&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0826f95&amp;gt;&amp;#93;&lt;/span&gt; cl_io_start+0x65/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.758696&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0829335&amp;gt;&amp;#93;&lt;/span&gt; cl_io_loop+0xa5/0x190 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.760178&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c60cbf&amp;gt;&amp;#93;&lt;/span&gt; ll_file_io_generic+0x67f/0xb50 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.761706&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c6145d&amp;gt;&amp;#93;&lt;/span&gt; ll_file_aio_write+0x12d/0x1f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.763229&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c615ee&amp;gt;&amp;#93;&lt;/span&gt; ll_file_write+0xce/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:19:31:&lt;span class=&quot;error&quot;&gt;&amp;#91;33123.764716&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811fe12d&amp;gt;&amp;#93;&lt;/span&gt; vfs_write+0xbd/0x1e0&lt;/p&gt;

&lt;p&gt;it&apos;s trying to make sync IO but notransno mode has been set on OST, so sync IO can&apos;t commit.&lt;/p&gt;

&lt;p&gt;I&apos;m trying to understand why it needs to be sync..&lt;/p&gt;

&lt;p&gt;	/* out of quota, try sync write */&lt;br/&gt;
	if (rc == -EDQUOT &amp;amp;&amp;amp; !cl_io_is_mkwrite(io)) {&lt;br/&gt;
		rc = vvp_io_commit_sync(env, io, queue,&lt;br/&gt;
					vio-&amp;gt;u.write.vui_from,&lt;br/&gt;
					vio-&amp;gt;u.write.vui_to);&lt;/p&gt;
</comment>
                            <comment id="206748" author="gerrit" created="Tue, 29 Aug 2017 09:16:34 +0000"  >&lt;p&gt;Alex Zhuravlev (alexey.zhuravlev@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/28775&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28775&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9247&quot; title=&quot;replay-ost-single test_5: test failed to respond and timed out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9247&quot;&gt;&lt;del&gt;LU-9247&lt;/del&gt;&lt;/a&gt; tests: additional debug information&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: efb1281e4a9f372725af689ecd3aa2002860c185&lt;/p&gt;</comment>
                            <comment id="207032" author="bzzz" created="Thu, 31 Aug 2017 09:12:08 +0000"  >&lt;p&gt;I think the reason is missing grant. there is a code in the test to get grants but it seem to fail, so dd gets stuck.&lt;br/&gt;
it would be good to land the patch above as I&apos;m unable to reproduce the failure manually. the changes suggested just&lt;br/&gt;
dump additional information about current grant, so it&apos;ll be easier to verify the theory.&lt;/p&gt;</comment>
                            <comment id="210828" author="casperjx" created="Wed, 11 Oct 2017 16:42:21 +0000"  >&lt;p&gt;This ticket should not be used for ldiskfs.  It was opened for OST consoles showing a hung txg_sync process.  I have only seen this process hang when ZFS is in the configuration.&lt;/p&gt;</comment>
                            <comment id="216669" author="jamesanunez" created="Tue, 19 Dec 2017 01:04:29 +0000"  >&lt;p&gt;Alex - Has this issue been fixed or is it understood or is it being addresses in another ticket? replay-ost-single test 5 still hangs frequently and there are several tickets open for test; &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9273&quot; title=&quot;replay-ost-single test_5: timeout after ost failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9273&quot;&gt;&lt;del&gt;LU-9273&lt;/del&gt;&lt;/a&gt;, &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5214&quot; title=&quot;Failure on test suite replay-ost-single test_5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5214&quot;&gt;&lt;del&gt;LU-5214&lt;/del&gt;&lt;/a&gt;, &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10348&quot; title=&quot;replay-ost-single test 5 hangs during OST fail/recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10348&quot;&gt;&lt;del&gt;LU-10348&lt;/del&gt;&lt;/a&gt;, and maybe others. Many of the dmesg and console logs for this failure don&apos;t have much information.&lt;/p&gt;</comment>
                            <comment id="216699" author="bzzz" created="Tue, 19 Dec 2017 07:12:16 +0000"  >&lt;p&gt;I&apos;ve seen many different failures with symptoms very similar to described in this ticket, so it&apos;s hard to say what has been fixed..&lt;/p&gt;</comment>
                            <comment id="223670" author="jamesanunez" created="Thu, 15 Mar 2018 03:15:15 +0000"  >&lt;p&gt;I think we are seeing this issues again. Please see the following for logs:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/d2b2a44c-2746-11e8-9e0e-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/d2b2a44c-2746-11e8-9e0e-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="24369">LU-4950</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="39743">LU-8601</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzz84v:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>