<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:20:03 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8729] conf-sanity test_84: FAIL: /dev/mapper/mds1_flakey failed to initialize!</title>
                <link>https://jira.whamcloud.com/browse/LU-8729</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;With patch &lt;a href=&quot;http://review.whamcloud.com/7200&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7200&lt;/a&gt; on master branch, conf-sanity test 84 failed as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;CMD: onyx-31vm7 e2label /dev/mapper/mds1_flakey 				2&amp;gt;/dev/null | grep -E &apos;:[a-zA-Z]{3}[0-9]{4}&apos;
Update not seen after 90s: wanted &apos;&apos; got &apos;lustre:MDT0000&apos;
 conf-sanity test_84: @@@@@@ FAIL: /dev/mapper/mds1_flakey failed to initialize! 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/e88a61c2-89bf-11e6-a8b7-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/e88a61c2-89bf-11e6-a8b7-5254006e85c2&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="40800">LU-8729</key>
            <summary>conf-sanity test_84: FAIL: /dev/mapper/mds1_flakey failed to initialize!</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="yujian">Jian Yu</reporter>
                        <labels>
                    </labels>
                <created>Wed, 19 Oct 2016 04:02:31 +0000</created>
                <updated>Thu, 14 Sep 2017 22:05:14 +0000</updated>
                            <resolved>Thu, 14 Sep 2017 22:05:14 +0000</resolved>
                                                    <fixVersion>Lustre 2.10.1</fixVersion>
                    <fixVersion>Lustre 2.11.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>12</watches>
                                                                            <comments>
                            <comment id="170264" author="yujian" created="Wed, 19 Oct 2016 04:03:49 +0000"  >&lt;p&gt;More failure instances:&lt;br/&gt;
replay-single test 0a: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/5fadea7c-8a19-11e6-91aa-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/5fadea7c-8a19-11e6-91aa-5254006e85c2&lt;/a&gt;&lt;br/&gt;
recovery-mds-scale: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/ad25283e-8bb2-11e6-91aa-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/ad25283e-8bb2-11e6-91aa-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="170303" author="gerrit" created="Wed, 19 Oct 2016 15:19:44 +0000"  >&lt;p&gt;Jian Yu (jian.yu@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/23251&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/23251&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8729&quot; title=&quot;conf-sanity test_84: FAIL: /dev/mapper/mds1_flakey failed to initialize!&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8729&quot;&gt;&lt;del&gt;LU-8729&lt;/del&gt;&lt;/a&gt; utils: improve lctl to add freeze and unfreeze operations&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: cb65ea32816a1df5ca23a11a681ff1db43d6cff9&lt;/p&gt;</comment>
                            <comment id="180716" author="yujian" created="Fri, 13 Jan 2017 03:50:50 +0000"  >&lt;p&gt;Hi Hongchao,&lt;/p&gt;

&lt;p&gt;I tested the dm-flakey patch on the latest master branch in &lt;a href=&quot;https://review.whamcloud.com/24733&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24733&lt;/a&gt; and found the following issues:&lt;/p&gt;

&lt;p&gt;On CentOS 6.8 distro, replay-single test 0c consistently hit the following errors on MDS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LDISKFS-fs (dm-6): ldiskfs_check_descriptors: Checksum for group 4 failed (61905!=6807)
LDISKFS-fs (dm-6): group descriptors corrupted!
LustreError: 6423:0:(osd_handler.c:6825:osd_mount()) lustre-MDT0000-osd: can&apos;t mount /dev/mapper/mds1_flakey: -22
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;On CentOS 7.3 distro, conf-sanity test 84, replay-single tests 0a, 4b, 5, 18, 19 and replay-ost-single test 9 consistently hit the following errors on MDS or OSS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: dmsetup load /dev/mapper/mds1_flakey --table &quot;0 4194304 flakey 252:0 0 0 1800 1 drop_writes&quot;
Lustre: DEBUG MARKER: dmsetup resume /dev/mapper/mds1_flakey
Buffer I/O error on dev dm-6, logical block 524272, async page read
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The &quot;Buffer I/O error&quot; didn&apos;t occur on CentOS 6.8 distro. It also didn&apos;t occur on CentOS 7.3 distro before patch set 21 for &lt;a href=&quot;https://review.whamcloud.com/7200&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/7200&lt;/a&gt;. The CentOS 7.3 kernel version for patch set 20 is &lt;tt&gt;3.10.0-327.36.3.el7&lt;/tt&gt;, and the kernel version for patch set 21 is &lt;tt&gt;3.10.0-514.el7&lt;/tt&gt;.&lt;/p&gt;

&lt;p&gt;Is there sth wrong with dm-flakey in different kernels?&lt;/p&gt;</comment>
                            <comment id="181094" author="hongchao.zhang" created="Wed, 18 Jan 2017 09:20:38 +0000"  >&lt;p&gt;On my VM, I have setup dm-flakey manually on 7.3 (3.10.0-514.2.2.el7) and run some basic tests (say, creation, deletion,&lt;br/&gt;
copy etc) on it,  and it show no error. I aslo looked at the code line of dm-flakey.c and found no clue of the problem so far.&lt;br/&gt;
Will update the status of the ticket once there is any progress.&lt;/p&gt;</comment>
                            <comment id="184399" author="green" created="Fri, 10 Feb 2017 18:20:28 +0000"  >&lt;p&gt;This whole &apos;dmsetup load /dev/mapper/mds1_flakey --table &quot;0 4194304 flakey 252:0 0 0 1800 1 drop_writes&quot;&apos; this looks a bit suspicious to me.&lt;/p&gt;

&lt;p&gt;Looking into the kenrel documentation, we can see:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Table parameters
----------------
  &amp;lt;dev path&amp;gt; &amp;lt;offset&amp;gt; &amp;lt;up interval&amp;gt; &amp;lt;down interval&amp;gt; \
    [&amp;lt;num_features&amp;gt; [&amp;lt;feature arguments&amp;gt;]]

Mandatory parameters:
    &amp;lt;dev path&amp;gt;: Full pathname to the underlying block-device, or a
                &quot;major:minor&quot; device-number.
    &amp;lt;offset&amp;gt;: Starting sector within the device.
    &amp;lt;up interval&amp;gt;: Number of seconds device is available.
    &amp;lt;down interval&amp;gt;: Number of seconds device returns errors.

Optional feature parameters:
  If no feature parameters are present, during the periods of
  unreliability, all I/O returns errors.

  drop_writes:
        All write I/O is silently ignored.
        Read I/O is handled correctly.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;According to this, &quot;1&quot; at the end of the table above is spurious? And if a target is unknown then it&apos;s just makes io errors to be returned from the io - which seems to be matching what is seen on rhel 7.3?&lt;br/&gt;
There&apos;s a huge diff in devicemapper code between rhell 7.2 and 7.3, though I don&apos;t think API ever had this extra number there.&lt;br/&gt;
Anyway I submitted the same patch we have no with just this table changed in &lt;a href=&quot;https://review.whamcloud.com/#/c/25387/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/25387/&lt;/a&gt; to see how well would that work and might be that&apos;s our fix in the end for newer distros.&lt;/p&gt;</comment>
                            <comment id="184403" author="green" created="Fri, 10 Feb 2017 18:27:57 +0000"  >&lt;p&gt;oh, I guess 1 is the &quot;num_features&quot; that I missed&lt;/p&gt;</comment>
                            <comment id="188490" author="adilger" created="Wed, 15 Mar 2017 17:31:48 +0000"  >&lt;p&gt;Jian, when is the last time this was tested? &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7481&quot; title=&quot;Failover: recovery-mds-scale test_failover_mds: /dev/lvm-Role_MDS/P1 failed to initialize!&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7481&quot;&gt;&lt;del&gt;LU-7481&lt;/del&gt;&lt;/a&gt; had a patch that was landed 2017-01-27 that fixed a related issue. &lt;/p&gt;</comment>
                            <comment id="188530" author="yujian" created="Wed, 15 Mar 2017 22:56:05 +0000"  >&lt;p&gt;Hi Andreas,&lt;br/&gt;
The last time was 2017-03-12 and the fix for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7481&quot; title=&quot;Failover: recovery-mds-scale test_failover_mds: /dev/lvm-Role_MDS/P1 failed to initialize!&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7481&quot;&gt;&lt;del&gt;LU-7481&lt;/del&gt;&lt;/a&gt; was included.&lt;br/&gt;
The consistent failures now are the errors in comment:&lt;br/&gt;
&lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-8729?focusedCommentId=180716&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-180716&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-8729?focusedCommentId=180716&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-180716&lt;/a&gt;&lt;/p&gt;
</comment>
                            <comment id="191620" author="yujian" created="Wed, 12 Apr 2017 00:12:38 +0000"  >&lt;p&gt;Hi Andreas and Oleg,&lt;/p&gt;

&lt;p&gt;The &quot;Buffer I/O error&quot; on device-mapper flakey device is still blocking the testing on patch &lt;a href=&quot;https://review.whamcloud.com/7200&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/7200&lt;/a&gt;. Hongchao looked at the codes of dm-flakey.c in CentOS 7.3 (kernel version 3.10.0-514.2.2.el7) but didn&apos;t find any clue of the problem.&lt;/p&gt;

&lt;p&gt;Could you please advise what we need to do to move this forward? Thank you.&lt;/p&gt;</comment>
                            <comment id="191687" author="simmonsja" created="Wed, 12 Apr 2017 15:44:48 +0000"  >&lt;p&gt;While testing I started to ask myself why are using dm_flakey for ever test. It appears this is only needed for recovery testing to replace lct --device readonly. Should we look to reduce the scope of dm-flakey to only the test that it is needed for?&lt;/p&gt;</comment>
                            <comment id="191780" author="simmonsja" created="Thu, 13 Apr 2017 00:17:50 +0000"  >&lt;p&gt;After working on this patch today I believe I got a handle on what is going on. So the &quot;Buffer I/O error&quot; are correct. So in the test you have:&lt;/p&gt;

&lt;p&gt;mount_client $MOUNT1 || error &quot;mount $MOUNT1 failed&quot;&lt;br/&gt;
 mount_client $MOUNT2 || error &quot;mount $MOUNT2 failed&quot;&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;make sure new superblock labels are sync&apos;d before disabling writes&lt;br/&gt;
 sync_all_data&lt;br/&gt;
 sleep 5&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;replay_barrier $SINGLEMDS&lt;br/&gt;
 createmany &lt;del&gt;o $DIR1/$tfile&lt;/del&gt;%d 1000&lt;/p&gt;

&lt;p&gt;This mounts and syncs all the data. Then replay_barrier using the power of the flakey driver sets the disk /dev/mapper/mds1_flakey to read only mode. &#160;The creation attempts by createmany on a read only file system is what is generated the &quot;Buffer I/O errors&quot;. This is what is supposed to happen. Next is:&lt;/p&gt;

&lt;p&gt;do_facet $SINGLEMDS &quot;lctl set_param fail_loc=0x20000709 fail_val=5&quot;&lt;/p&gt;

&lt;p&gt;facet_failover --fsck $SINGLEMDS || error &quot;failover: $?&quot;&lt;/p&gt;

&lt;p&gt;The facet_failover part is what is important. Here we set a special flag to make sure e2fsck is run on the MDT. Looking at the Maloo logs:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/2cb1e62c-1f0c-11e7-8920-5254006e85c2/show_text&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/2cb1e62c-1f0c-11e7-8920-5254006e85c2/show_text&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I noticed the error:&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;26120.388883&amp;#93;&lt;/span&gt; Lustre: DEBUG MARKER: /usr/sbin/lctl mark  conf-sanity test_84: \&lt;br/&gt;
@@@@@@ FAIL: e2fsck -d -v -t -t -f -n \/dev\/mapper\/mds1_flakey returned 8, \&lt;br/&gt;
should be \&amp;lt;= 4 &lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;26120.544691&amp;#93;&lt;/span&gt; Lustre: DEBUG MARKER: conf-sanity test_84: @@@@@@ FAIL: e2fsck \&lt;br/&gt;
-d -v -t -t -f -n /dev/mapper/mds1_flakey returned 8, should be &amp;lt;= 4&lt;/p&gt;

&lt;p&gt;Running e2fsck shouldn&apos;t work on a read only device, mds1_flakey. I fixed up&lt;br/&gt;
shutdown_facet which is called right before run_e2fsck in the function &lt;br/&gt;
failover_facet(). Now it should not only stop the device but remove the flakey&lt;br/&gt;
device and then e2fsck will be run on the native device which is not read only.&lt;br/&gt;
Now I see a different failure. It looks like MDT0000 is not suppose to recover&lt;br/&gt;
but it does for. Will need to look into it.&lt;br/&gt;
&#160;&lt;/p&gt;</comment>
                            <comment id="191807" author="yujian" created="Thu, 13 Apr 2017 07:45:54 +0000"  >&lt;p&gt;Thank you very much James for helping debug this failure.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;While testing I started to ask myself why are using dm_flakey for ever test. It appears this is only needed for recovery testing to replace lct --device readonly. Should we look to reduce the scope of dm-flakey to only the test that it is needed for?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Besides recovery test suites (replay-single, replay-dual, etc), recovery testing also exists in other test suites (sanity, insanity, conf-sanity, etc). The changes will be a lot. What&apos;s more, we would like to make mount and unmount transparent, so that developers do not need to know they have to use a special option to create flakey devices while creating a recovery testing related sub-test.   &lt;/p&gt;</comment>
                            <comment id="193144" author="yujian" created="Sun, 23 Apr 2017 06:57:37 +0000"  >&lt;p&gt;After doing some experiments, I found out the commits that caused the &quot;Buffer I/O error&quot;. They are related to RHEL 7.3 support:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit adc9592d1820d5086e52e387008263b4dace9b0e
Author:     Christopher J. Morrone &amp;lt;morrone2@llnl.gov&amp;gt;
CommitDate: Fri Nov 18 15:35:18 2016 +0000

    LU-8534 ldiskfs: Add patch series for RHEL7.3    
    Reviewed-on: http://review.whamcloud.com/22113

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit 5763c175c9a1a330b664a01bc08f329b2bee54f7
Author:     Bob Glossman &amp;lt;bob.glossman@intel.com&amp;gt;
CommitDate: Fri Nov 18 15:35:26 2016 +0000

    LU-8796 kernel: kernel upgrade RHEL7.3 [3.10.0-514.el7]
    Reviewed-on: http://review.whamcloud.com/23560

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;In &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-684&quot; title=&quot;replace dev_rdonly kernel patch with dm-flakey&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-684&quot;&gt;&lt;del&gt;LU-684&lt;/del&gt;&lt;/a&gt; patch &lt;a href=&quot;https://review.whamcloud.com/7200&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/7200&lt;/a&gt;, replay-single test passed on patch set 20:&lt;br/&gt;
 &lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/c01176f8-a5ef-11e6-b605-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/c01176f8-a5ef-11e6-b605-5254006e85c2&lt;/a&gt;&lt;br/&gt;
 but failed on patch set 21 because of the above commits:&lt;br/&gt;
 &lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/41fe0cb2-beeb-11e6-92c6-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/41fe0cb2-beeb-11e6-92c6-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The error message on MDS console was:&lt;/p&gt;
&lt;div class=&quot;panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;panelContent&quot;&gt;
&lt;p&gt;Lustre: DEBUG MARKER: dmsetup suspend --nolockfs --noflush /dev/mapper/mds1_flakey&lt;br/&gt;
 Lustre: DEBUG MARKER: dmsetup load /dev/mapper/mds1_flakey --table &quot;0 4194304 flakey 252:0 0 0 1800 1 drop_writes&quot;&lt;br/&gt;
 Lustre: DEBUG MARKER: dmsetup resume /dev/mapper/mds1_flakey&lt;br/&gt;
 &lt;font color=&quot;#FF0000&quot;&gt;Buffer I/O error on dev dm-6, logical block 524272, async page read&lt;/font&gt;&lt;/p&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;In RHEL 7.3 kernel 3.10.0-514.el7, the above error message came from fs/buffer.c:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; void end_buffer_async_read(struct buffer_head *bh, &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; uptodate)
{
        &lt;span class=&quot;code-comment&quot;&gt;//&#8230;&#8230;
&lt;/span&gt;        page = bh-&amp;gt;b_page;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (uptodate) {
                set_buffer_uptodate(bh);
        } &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; {
                clear_buffer_uptodate(bh);
                buffer_io_error(bh, &lt;span class=&quot;code-quote&quot;&gt;&quot;, async page read&quot;&lt;/span&gt;);
                SetPageError(page);
        }
        &lt;span class=&quot;code-comment&quot;&gt;//&#8230;&#8230;
&lt;/span&gt;}

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="193936" author="adilger" created="Fri, 28 Apr 2017 18:33:35 +0000"  >&lt;p&gt;One option is to add a &lt;tt&gt;WARN_ON()&lt;/tt&gt; in &lt;tt&gt;end_buffer_async_read()&lt;/tt&gt;, so that this will dump the full stack.  Note that the callpath for &lt;tt&gt;end_buffer_async_read()&lt;/tt&gt; does not come from &lt;tt&gt;mark_buffer_async_read()&lt;/tt&gt;, that is just setting the &lt;tt&gt;bh-&amp;gt;b_end_io&lt;/tt&gt; pointer so that the &lt;tt&gt;end_buffer_async_read()&lt;/tt&gt; callback is called when the read from disk is actually finished.&lt;/p&gt;

&lt;p&gt;It is probably coming through:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;-&amp;gt;dmio_complete()
   bio.bi_error = error ? -EIO : 0;
or
-&amp;gt;bio_complete()
-&amp;gt;bio.bi_end_io = end_bio_bh_io_sync();
                 -&amp;gt;bh-&amp;gt;b_end_io(bh, !bio-&amp;gt;bi_error);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I do see some comments in &lt;tt&gt;dm-flakey.c&lt;/tt&gt; that might indicate problems if the &lt;tt&gt;DROP_WRITES&lt;/tt&gt; state is being changed:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;                } &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!test_bit(DROP_WRITES, &amp;amp;fc-&amp;gt;flags) &amp;amp;&amp;amp;
                           !test_bit(ERROR_WRITES, &amp;amp;fc-&amp;gt;flags)) {
                        /*
                         * Error read during the down_interval &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; drop_writes
                         * and error_writes were not configured.
                         */
                        &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; -EIO;
                }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;so that may be a result of changing the &lt;tt&gt;drop_writes&lt;/tt&gt; state in the middle of a read.  I see just before configuring the &lt;tt&gt;dm-flakey&lt;/tt&gt; device:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;dmsetup suspend --nolockfs --noflush /dev/mapper/mds1_flakey
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Does &quot;--noflush&quot; means there may still be partially-completed reads?&lt;/p&gt;


&lt;p&gt;At a higher level, have we considered using the &lt;tt&gt;dm-flakey&lt;/tt&gt; device only for cases when &lt;tt&gt;lctl readonly&lt;/tt&gt; is actually used?  It appears that this is only called from &lt;tt&gt;replay_barrier()&lt;/tt&gt;, &lt;tt&gt;replay_barrier_nosync()&lt;/tt&gt;, and &lt;tt&gt;replay_barrier_nodf()&lt;/tt&gt;.  I see one call to &lt;tt&gt;replay_barrier&lt;/tt&gt; in &lt;tt&gt;conf-sanity.sh::test_84()&lt;/tt&gt;, but we might consider to move this test over to &lt;tt&gt;replay-single.sh&lt;/tt&gt;.  It is also worthwhile to note that &lt;tt&gt;dm-flakey&lt;/tt&gt; is only needed for ldiskfs, not for ZFS filesystems.&lt;/p&gt;</comment>
                            <comment id="194017" author="yujian" created="Mon, 1 May 2017 17:11:06 +0000"  >&lt;blockquote&gt;&lt;p&gt;One option is to add a WARN_ON() in end_buffer_async_read(), so that this will dump the full stack.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;The full stack is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: dmsetup suspend --nolockfs --noflush /dev/mapper/mds1_flakey 
Lustre: DEBUG MARKER: dmsetup load /dev/mapper/mds1_flakey --table &quot;0 4194304 flakey 252:0 0 0 1800 1 drop_writes&quot; 
Lustre: DEBUG MARKER: dmsetup resume /dev/mapper/mds1_flakey 
Buffer I/O error on dev dm-6, logical block 524272, async page read 
------------[ cut here ]------------
WARNING: at fs/buffer.c:295 end_buffer_async_read+0x119/0x140()
Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) mgc(OE) osd_ldiskfs(OE) lquota(OE) fid(OE) fld(OE) ksocklnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) ldiskfs(OE) libcfs(OE) dm_flakey dm_mod rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core iosf_mbi crc32_pclmul ghash_clmulni_intel aesni_intel ppdev lrw gf128mul glue_helper ablk_helper cryptd pcspkr virtio_balloon i2c_piix4 parport_pc parport nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_blk cirrus 8139too drm_kms_helper crct10dif_pclmul crct10dif_common syscopyarea sysfillrect crc32c_intel serio_raw sysimgblt fb_sys_fops ttm ata_piix virtio_pci 8139cp virtio_ring mii virtio drm libata i2c_core floppy
CPU: 1 PID: 14902 Comm: systemd-udevd Tainted: G        W  OE  ------------   3.10.0-514.16.1.el7_lustre.x86_64 #1
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
 0000000000000000 0000000081bc25b0 ffff880064f4b990 ffffffff81686d2f
 ffff880064f4b9c8 ffffffff81085cb0 ffff8800642a7e00 ffff8800631e5068
 ffff8800642a7e00 ffffea00017e7640 ffff880077f24800 ffff880064f4b9d8
Call Trace:
 [&amp;lt;ffffffff81686d2f&amp;gt;] dump_stack+0x19/0x1b
 [&amp;lt;ffffffff81085cb0&amp;gt;] warn_slowpath_common+0x70/0xb0
 [&amp;lt;ffffffff81085dfa&amp;gt;] warn_slowpath_null+0x1a/0x20
 [&amp;lt;ffffffff81234269&amp;gt;] end_buffer_async_read+0x119/0x140
 [&amp;lt;ffffffff8123263c&amp;gt;] end_bio_bh_io_sync+0x2c/0x60
 [&amp;lt;ffffffff81239454&amp;gt;] bio_endio+0x64/0xa0
 [&amp;lt;ffffffffa063c8da&amp;gt;] dec_pending+0x18a/0x2e0 [dm_mod]
 [&amp;lt;ffffffffa063e359&amp;gt;] __split_and_process_bio+0x2d9/0x500 [dm_mod]
 [&amp;lt;ffffffff812e0000&amp;gt;] ? aes_encrypt+0xb30/0xe00 
 [&amp;lt;ffffffffa063e68e&amp;gt;] dm_make_request+0x10e/0x170 [dm_mod]
 [&amp;lt;ffffffff812eee19&amp;gt;] generic_make_request+0x109/0x1e0
 [&amp;lt;ffffffff812eef61&amp;gt;] submit_bio+0x71/0x150
 [&amp;lt;ffffffff812381dd&amp;gt;] ? bio_alloc_bioset+0x1fd/0x350
 [&amp;lt;ffffffff81233283&amp;gt;] _submit_bh+0x143/0x210
 [&amp;lt;ffffffff81235bd7&amp;gt;] block_read_full_page+0x217/0x350
 [&amp;lt;ffffffff81239670&amp;gt;] ? I_BDEV+0x10/0x10
 [&amp;lt;ffffffff8118e219&amp;gt;] ? force_page_cache_readahead+0x99/0xe0
 [&amp;lt;ffffffff8123a0a8&amp;gt;] blkdev_readpage+0x18/0x20
 [&amp;lt;ffffffff811829df&amp;gt;] generic_file_aio_read+0x3cf/0x790
 [&amp;lt;ffffffff8123a46c&amp;gt;] blkdev_aio_read+0x4c/0x70
 [&amp;lt;ffffffff811fde6d&amp;gt;] do_sync_read+0x8d/0xd0
 [&amp;lt;ffffffff811fe61e&amp;gt;] vfs_read+0x9e/0x170
 [&amp;lt;ffffffff811ff1ef&amp;gt;] SyS_read+0x7f/0xe0
 [&amp;lt;ffffffff816973c9&amp;gt;] system_call_fastpath+0x16/0x1b
---[ end trace 5808138721c1994b ]---
Lustre: DEBUG MARKER: /usr/sbin/lctl mark mds1 REPLAY BARRIER on lustre-MDT0000
Lustre: DEBUG MARKER: mds1 REPLAY BARRIER on lustre-MDT0000
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;m running the test again with &quot;--noflush&quot; removed.&lt;/p&gt;</comment>
                            <comment id="194102" author="yujian" created="Tue, 2 May 2017 00:16:41 +0000"  >&lt;p&gt;With &quot;--noflush&quot; removed, the full stack is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: dmsetup suspend --nolockfs /dev/mapper/mds1_flakey
Lustre: DEBUG MARKER: dmsetup load /dev/mapper/mds1_flakey --table &quot;0 4194304 flakey 252:0 0 0 1800 1 drop_writes&quot;
Lustre: DEBUG MARKER: dmsetup resume /dev/mapper/mds1_flakey
Buffer I/O error on dev dm-6, logical block 524272, async page read
------------[ cut here ]------------
WARNING: at fs/buffer.c:295 end_buffer_async_read+0x119/0x140()
Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) mgc(OE) osd_ldiskfs(OE) lquota(OE) fid(OE) fld(OE) ksocklnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) ldiskfs(OE) libcfs(OE) dm_flakey dm_mod rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core ppdev iosf_mbi crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr virtio_balloon i2c_piix4 parport_pc parport nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_blk cirrus drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops crct10dif_pclmul 8139too crct10dif_common ttm crc32c_intel serio_raw virtio_pci 8139cp virtio_ring mii virtio drm ata_piix i2c_core libata floppy
CPU: 1 PID: 14904 Comm: systemd-udevd Tainted: G        W  OE  ------------   3.10.0-514.16.1.el7_lustre.x86_64 #1
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
 0000000000000000 00000000cc24690a ffff88007b733990 ffffffff81686d2f
 ffff88007b7339c8 ffffffff81085cb0 ffff88007a994f00 ffff88006290fa90
 ffff88007a994f00 ffffea0001ea3400 ffff880078178800 ffff88007b7339d8
Call Trace:
 [&amp;lt;ffffffff81686d2f&amp;gt;] dump_stack+0x19/0x1b
 [&amp;lt;ffffffff81085cb0&amp;gt;] warn_slowpath_common+0x70/0xb0
 [&amp;lt;ffffffff81085dfa&amp;gt;] warn_slowpath_null+0x1a/0x20
 [&amp;lt;ffffffff81234269&amp;gt;] end_buffer_async_read+0x119/0x140
 [&amp;lt;ffffffff8123263c&amp;gt;] end_bio_bh_io_sync+0x2c/0x60
 [&amp;lt;ffffffff81239454&amp;gt;] bio_endio+0x64/0xa0
 [&amp;lt;ffffffffa063b8da&amp;gt;] dec_pending+0x18a/0x2e0 [dm_mod]
 [&amp;lt;ffffffffa063d359&amp;gt;] __split_and_process_bio+0x2d9/0x500 [dm_mod]
 [&amp;lt;ffffffff812e0000&amp;gt;] ? aes_encrypt+0xb30/0xe00
 [&amp;lt;ffffffffa063d68e&amp;gt;] dm_make_request+0x10e/0x170 [dm_mod]
 [&amp;lt;ffffffff812eee19&amp;gt;] generic_make_request+0x109/0x1e0
 [&amp;lt;ffffffff812eef61&amp;gt;] submit_bio+0x71/0x150
 [&amp;lt;ffffffff812381dd&amp;gt;] ? bio_alloc_bioset+0x1fd/0x350
 [&amp;lt;ffffffff81233283&amp;gt;] _submit_bh+0x143/0x210
 [&amp;lt;ffffffff81235bd7&amp;gt;] block_read_full_page+0x217/0x350
 [&amp;lt;ffffffff81239670&amp;gt;] ? I_BDEV+0x10/0x10
 [&amp;lt;ffffffff8118e219&amp;gt;] ? force_page_cache_readahead+0x99/0xe0
 [&amp;lt;ffffffff8123a0a8&amp;gt;] blkdev_readpage+0x18/0x20
 [&amp;lt;ffffffff811829df&amp;gt;] generic_file_aio_read+0x3cf/0x790
 [&amp;lt;ffffffff8123a46c&amp;gt;] blkdev_aio_read+0x4c/0x70
 [&amp;lt;ffffffff811fde6d&amp;gt;] do_sync_read+0x8d/0xd0
 [&amp;lt;ffffffff811fe61e&amp;gt;] vfs_read+0x9e/0x170
 [&amp;lt;ffffffff811ff1ef&amp;gt;] SyS_read+0x7f/0xe0
 [&amp;lt;ffffffff816973c9&amp;gt;] system_call_fastpath+0x16/0x1b
---[ end trace 0cce06cbf33ea2b1 ]---
Lustre: DEBUG MARKER: /usr/sbin/lctl mark mds1 REPLAY BARRIER on lustre-MDT0000
LustreError: 12953:0:(osd_io.c:1460:osd_ldiskfs_read()) lustre-MDT0000: can&apos;t read 4096@0 on ino 265: rc = -5
LustreError: 12953:0:(osd_io.c:1460:osd_ldiskfs_read()) Skipped 6 previous similar messages
LustreError: 12953:0:(llog_osd.c:262:llog_osd_read_header()) lustre-MDT0000-osd: bad log  [0xa:0xb:0x0] header magic: 0x0 (expected 0x10645539)
LustreError: 12953:0:(llog_osd.c:262:llog_osd_read_header()) Skipped 6 previous similar messages
Lustre: DEBUG MARKER: mds1 REPLAY BARRIER on lustre-MDT0000
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="194414" author="yujian" created="Thu, 4 May 2017 07:15:02 +0000"  >&lt;p&gt;Hi Andreas,&lt;/p&gt;

&lt;p&gt;I also removed &quot;&amp;#45;&amp;#45;nolockfs&quot; and got the same failure and full stack. So, the failure is not related to &quot;&amp;#45;&amp;#45;nolockfs&quot; and/or &quot;&amp;#45;&amp;#45;noflush&quot; options.&lt;/p&gt;</comment>
                            <comment id="195628" author="yujian" created="Fri, 12 May 2017 05:32:39 +0000"  >&lt;p&gt;Hi Hongchao,&lt;/p&gt;

&lt;p&gt;Could you please proceed with this blocker? Thank you.&lt;/p&gt;</comment>
                            <comment id="197775" author="hongchao.zhang" created="Thu, 1 Jun 2017 06:50:53 +0000"  >&lt;p&gt;Status Update:&lt;/p&gt;

&lt;p&gt;I have tested it in Maloo by disabling different patches contained in &lt;a href=&quot;http://review.whamcloud.com/22113&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22113&lt;/a&gt; and&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/23560&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/23560&lt;/a&gt; (the debug patch is tracked at &lt;a href=&quot;https://review.whamcloud.com/#/c/26788/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/26788/&lt;/a&gt;, the patch set is 15 ~ 21).&lt;br/&gt;
the error &quot;Buffer I/O error&quot; still occur.&lt;/p&gt;

&lt;p&gt;I looked at the test result in the original patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-684&quot; title=&quot;replace dev_rdonly kernel patch with dm-flakey&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-684&quot;&gt;&lt;del&gt;LU-684&lt;/del&gt;&lt;/a&gt; (&lt;a href=&quot;https://review.whamcloud.com/#/c/7200/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/7200/&lt;/a&gt;), there is no such problem prior to&lt;br/&gt;
patch set 20 and start to occur from patch set 21, the difference of Lustre itself between the two patch set is the two patches&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/22113&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22113&lt;/a&gt; and &lt;a href=&quot;http://review.whamcloud.com/23560&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/23560&lt;/a&gt;, but the more big different is the change of kernel,&lt;br/&gt;
patch set 20 still used RHEL7.2 to test, but patch set 21 began to use RHEL7.3.&lt;/p&gt;

&lt;p&gt;Patch Set 20: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/c01176f8-a5ef-11e6-b605-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/c01176f8-a5ef-11e6-b605-5254006e85c2&lt;/a&gt;&#8232;    (3.10.0-327.36.3.el7.x86_64)&lt;br/&gt;
Patch Set 21: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/41fe0cb2-beeb-11e6-92c6-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/41fe0cb2-beeb-11e6-92c6-5254006e85c2&lt;/a&gt;   (3.10.0-514.el7.x86_64)&lt;/p&gt;

&lt;p&gt;Then the issue could be caused by the kernel RHEL7.3 (3.10.0-514.el7.x86_64)&lt;/p&gt;</comment>
                            <comment id="198620" author="hongchao.zhang" created="Thu, 8 Jun 2017 14:02:03 +0000"  >&lt;p&gt;I have setup the RHEL7.3 (3.10.0-514.2.2.el7.x86_64) and reproduce the problem by using ext4 and dm-flakey module&lt;br/&gt;
the reproducer is attached.&lt;/p&gt;</comment>
                            <comment id="198803" author="simmonsja" created="Sat, 10 Jun 2017 02:15:14 +0000"  >&lt;p&gt;Can you try linux-commit: 299f6230bc6d0ccd5f95bb0fb865d80a9c7d5ccc&lt;/p&gt;

&lt;p&gt;dm flakey: fix reads to be issued if drop_writes configured&lt;/p&gt;

&lt;p&gt;v4.8-rc3 commit 99f3c90d0d (&quot;dm flakey: error READ bios during the&lt;br/&gt;
down_interval&quot;) overlooked the &apos;drop_writes&apos; feature, which is meant to&lt;br/&gt;
allow reads to be issued rather than errored, during the down_interval.&lt;/p&gt;

&lt;p&gt;Fixes: 99f3c90d0d (&quot;dm flakey: error READ bios during the down_interval&quot;)&lt;br/&gt;
Reported-by: Qu Wenruo &amp;lt;quwenruo@cn.fujitsu.com&amp;gt;&lt;br/&gt;
Signed-off-by: Mike Snitzer &amp;lt;snitzer@redhat.com&amp;gt;&lt;br/&gt;
Cc: stable@vger.kernel.org&lt;/p&gt;</comment>
                            <comment id="198839" author="yujian" created="Sun, 11 Jun 2017 06:12:14 +0000"  >&lt;p&gt;Hi James,&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;Can you try linux-commit: 299f6230bc6d0ccd5f95bb0fb865d80a9c7d5ccc&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I tried this in &lt;a href=&quot;https://review.whamcloud.com/26788&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26788&lt;/a&gt; (patch set 25). The same error still occurred:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Buffer I/O error on dev dm-6, logical block 524272, async page read
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="199322" author="hongchao.zhang" created="Thu, 15 Jun 2017 11:19:20 +0000"  >&lt;p&gt;I have tested the commit locally, the commit can fix the problem on 3.10.0-514.2.2.el7.x86_64,&lt;br/&gt;
but can&apos;t on 3.10.0-514.21.1.el7.x86_64, which is being used in our Auotest system.&lt;/p&gt;</comment>
                            <comment id="199352" author="yujian" created="Thu, 15 Jun 2017 16:49:17 +0000"  >&lt;p&gt;The autotest system uses the build with the following kernel version supported by the latest master branch:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;$ head -n2 lustre/kernel_patches/targets/3.10-rhel7.target.in 
lnxmaj=&quot;3.10.0&quot;
lnxrel=&quot;514.21.1.el7&quot;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So, the commit doesn&apos;t work and the failure still exists.&lt;/p&gt;</comment>
                            <comment id="208435" author="simmonsja" created="Thu, 14 Sep 2017 21:23:10 +0000"  >&lt;p&gt;So the good news is this is fixed in RHEL7.4. If some one really wants to work on RHEL7.3 can compare the sources in RHEL7.4 to RHLE7.3 to see which change fixed this problem so we can include the patch fpr RHEL7.3 if so desired. I updated the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-684&quot; title=&quot;replace dev_rdonly kernel patch with dm-flakey&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-684&quot;&gt;&lt;del&gt;LU-684&lt;/del&gt;&lt;/a&gt; in the mean time to move this work forward.&lt;/p&gt;</comment>
                            <comment id="208441" author="pjones" created="Thu, 14 Sep 2017 22:05:14 +0000"  >&lt;p&gt;No - let&apos;s just focus on RHEL 7.4&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10120">
                    <name>Blocker</name>
                                            <outwardlinks description="is blocking">
                                        <issuelink>
            <issuekey id="11774">LU-684</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="33340">LU-7481</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="10111">LU-20</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="26944" name="reproducer.sh" size="590" author="hongchao.zhang" created="Thu, 8 Jun 2017 14:03:01 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzysgf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>