<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:07:19 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-468] md-raid corruptions for zero copy patch.</title>
                <link>https://jira.whamcloud.com/browse/LU-468</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While porting zero copy patch to RHEL6 we have found some corruptions during IO while raid5/6 reconstruction. I think it&apos;s should be affect to RHEL5 also.&lt;/p&gt;

&lt;p&gt;it&apos;s easy to replicated by&lt;/p&gt;

&lt;p&gt;echo 32 &amp;gt; /sys/block/md0/md/stripe_cache_size &lt;br/&gt;
echo 0 &amp;gt; /proc/fs/lustre/obdfilter/&amp;lt;ost_name&amp;gt;/writethrough_cache_enable &lt;br/&gt;
echo 0 &amp;gt; /proc/fs/lustre/obdfilter/&amp;lt;ost_name&amp;gt;/read_cache_enable &lt;/p&gt;

&lt;p&gt;and fail one of disks with &lt;br/&gt;
mdadm /dev/mdX --fail /dev/....&lt;/p&gt;

&lt;p&gt;after it verify data is correct.&lt;/p&gt;


&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;root@sjlustre1-o1 ~&amp;#93;&lt;/span&gt;# dd if=/dev/urandom of=test.1 oflag=direct bs=128k&lt;br/&gt;
count=8&lt;br/&gt;
8+0 records in&lt;br/&gt;
8+0 records out&lt;br/&gt;
1048576 bytes (1.0 MB) copied, 0.157819 seconds, 6.6 MB/s&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@sjlustre1-o1 ~&amp;#93;&lt;/span&gt;# md5sum test.1&lt;br/&gt;
4ec4d0b67a2b3341795706605e0b0a28  test.1&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@sjlustre1-o1 ~&amp;#93;&lt;/span&gt;# md5sum test.1 &amp;gt; test.1.md5&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@sjlustre1-o1 ~&amp;#93;&lt;/span&gt;# dd if=test.1 iflag=direct of=/lustre/stry/test.1&lt;br/&gt;
oflag=direct bs=128k&lt;br/&gt;
8+0 records in&lt;br/&gt;
8+0 records out&lt;br/&gt;
1048576 bytes (1.0 MB) copied, 0.319458 seconds, 3.3 MB/s&lt;/p&gt;


&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;root@sjlustre1-o1 ~&amp;#93;&lt;/span&gt;# dd if=/lustre/stry/test.1 iflag=direct of=test.2&lt;br/&gt;
oflag=direct bs=128k&lt;br/&gt;
8+0 records in&lt;br/&gt;
8+0 records out&lt;br/&gt;
1048576 bytes (1.0 MB) copied, 0.114691 seconds, 9.1 MB/s&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@sjlustre1-o1 ~&amp;#93;&lt;/span&gt;# md5sum test.1 test.2&lt;br/&gt;
4ec4d0b67a2b3341795706605e0b0a28  test.1&lt;br/&gt;
426c976b75fa3ce5b5ae22b5195f85fd  test.2&lt;/p&gt;


&lt;p&gt;after work problem identified as two bugs in zcopy patch.&lt;br/&gt;
1) raid5 set a flag UPTODATE to stripe with staled pointers from DIO and try to copy data from these pointers during READ phase.&lt;/p&gt;

&lt;p&gt;2) restoring pages from stripe cache issue.&lt;/p&gt;

&lt;p&gt;please verify it&apos;s issue on RHEL5 env (we don&apos;t have it&apos;s now).&lt;/p&gt;</description>
                <environment>RHEL6</environment>
        <key id="11246">LU-468</key>
            <summary>md-raid corruptions for zero copy patch.</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="rhenwood">Richard Henwood</assignee>
                                    <reporter username="shadow">Alexey Lyashkov</reporter>
                        <labels>
                    </labels>
                <created>Mon, 27 Jun 2011 03:06:45 +0000</created>
                <updated>Tue, 20 Nov 2012 14:10:03 +0000</updated>
                            <resolved>Tue, 26 Jul 2011 13:03:35 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                                    <fixVersion>Lustre 2.1.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="17003" author="jay" created="Mon, 27 Jun 2011 13:57:29 +0000"  >&lt;p&gt;Hi Shadow, can you please show me the problematic code?&lt;/p&gt;</comment>
                            <comment id="17004" author="pjones" created="Mon, 27 Jun 2011 14:00:42 +0000"  >&lt;p&gt;Richard is going to try and repro this issue&lt;/p&gt;</comment>
                            <comment id="17005" author="shadow" created="Mon, 27 Jun 2011 14:52:57 +0000"  >&lt;p&gt;this is two patches which a fix issue for RHEL6 port.&lt;/p&gt;</comment>
                            <comment id="17083" author="pjones" created="Tue, 28 Jun 2011 11:06:01 +0000"  >&lt;p&gt;Alexey&lt;/p&gt;

&lt;p&gt;Could you please upload these patches into gerrit?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="17192" author="rhenwood" created="Thu, 30 Jun 2011 09:10:45 +0000"  >&lt;p&gt;I have been looking at this issue on CentOS 5.6, s/w raid on a sun machine. An initial attempt did not reproduce this issue, however there are a number of factors may be be in play and this result isn&apos;t conclusive.&lt;/p&gt;

&lt;p&gt;Work continues to reproduce on both RHEL5 and RHEL6. I am now reserving resources to more accurately identify the scope of this issue.&lt;/p&gt;</comment>
                            <comment id="17210" author="rhenwood" created="Fri, 1 Jul 2011 21:06:58 +0000"  >&lt;p&gt;Hi Alexey, &lt;/p&gt;

&lt;p&gt;I&apos;ve been working on this bug today, can you provide clarification on which kernel did you used to get the corruption - including the zero copy patch.&lt;/p&gt;</comment>
                            <comment id="17457" author="rhenwood" created="Fri, 8 Jul 2011 11:22:51 +0000"  >&lt;p&gt;Alexey, &lt;/p&gt;

&lt;p&gt;can you please review the steps I&apos;m taking (below) to verify that I&apos;m not missing something when trying to reproduce this issue.&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;h3&gt;&lt;a name=&quot;Provisioningatestmachine&quot;&gt;&lt;/a&gt;Provisioning a test machine&lt;/h3&gt;

&lt;ul&gt;
	&lt;li&gt;Provision AMD64 with CentOS 5.6 &lt;a href=&quot;http://newbuild.whamcloud.com/job/lustre-master/arch=x86_64,build_type=server,distro=el5,ib_stack=inkernel/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;latest build&lt;/a&gt;&lt;/li&gt;
	&lt;li&gt;Slice up &lt;tt&gt;sdb&lt;/tt&gt; into 10G chunks: One extended partition over the whole drive, sliced up into six 10G logical partitions.&lt;/li&gt;
&lt;/ul&gt;


&lt;h3&gt;&lt;a name=&quot;BuildingaRAID5set&quot;&gt;&lt;/a&gt;Building a RAID 5 set&lt;/h3&gt;

&lt;p&gt;Taken from &lt;a href=&quot;https://raid.wiki.kernel.org/index.php/RAID_setup&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;RAID wiki&lt;/a&gt;&lt;/p&gt;

&lt;ol&gt;
	&lt;li&gt;load the raid 5 module
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# modprobe raid456
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
	&lt;li&gt;Create the raid array:
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;mdadm --create --verbose /dev/md0 --chunk=64 --level=5 --raid-devices=3 /dev/sdb5 /dev/sdb6 /dev/sdb7
mdadm --create --verbose /dev/md127 --chunk=64 --level=5 --raid-devices=3 /dev/sdb8 /dev/sdb9 /dev/sdb10
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ol&gt;



&lt;h3&gt;&lt;a name=&quot;BuildaLustrefilesystemonthemd0device.&quot;&gt;&lt;/a&gt;Build a Lustre filesystem on the md0 device.&lt;/h3&gt;

&lt;ol&gt;
	&lt;li&gt;Run the following:
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;OSTDEV1=&lt;span class=&quot;code-quote&quot;&gt;&quot;/dev/md0&quot;&lt;/span&gt; OSTDEV2=&lt;span class=&quot;code-quote&quot;&gt;&quot;/dev/md127&quot;&lt;/span&gt; /usr/lib64/lustre/tests/llmount.sh
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;The Lustre fs is now available at &lt;tt&gt;/mnt/lustre/&lt;/tt&gt;&lt;/p&gt;


&lt;h3&gt;&lt;a name=&quot;FollowingLU468&quot;&gt;&lt;/a&gt;Following &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-468&quot; title=&quot;md-raid corruptions for zero copy patch.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-468&quot;&gt;&lt;del&gt;LU-468&lt;/del&gt;&lt;/a&gt;&lt;/h3&gt;

&lt;ol&gt;
	&lt;li&gt;Fix strip cache size, disable writethrough/read caching:
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;echo 32 &amp;gt; /sys/block/md0/md/stripe_cache_size
echo 32 &amp;gt; /sys/block/md127/md/stripe_cache_size
echo 0 &amp;gt; /proc/fs/lustre/obdfilter/lustre-OST0000/writethrough_cache_enable
echo 0 &amp;gt; /proc/fs/lustre/obdfilter/lustre-OST0001/writethrough_cache_enable
echo 0 &amp;gt; /proc/fs/lustre/obdfilter/lustre-OST0000/read_cache_enable
echo 0 &amp;gt; /proc/fs/lustre/obdfilter/lustre-OST0001/read_cache_enable
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ol&gt;


&lt;h4&gt;&lt;a name=&quot;Createafile%2CnotontheLustrefs%3A&quot;&gt;&lt;/a&gt;Create a file, not on the Lustre fs:&lt;/h4&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/urandom of=/root/test.1 oflag=direct bs=128k count=8
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;result:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;8+0 records in
8+0 records out
1048576 bytes (1.0 MB) copied, 0.23174 seconds, 4.5 MB/s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h4&gt;&lt;a name=&quot;md5sum%7B%7B%2Froot%2Ftest.1%7D%7D&quot;&gt;&lt;/a&gt;md5sum &lt;tt&gt;/root/test.1&lt;/tt&gt;&lt;/h4&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# md5sum test.1
2cb6571392d5ba2e0bd34e3a33f35a43  test.1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h4&gt;&lt;a name=&quot;ddthefileontotheLustrefs%3A&quot;&gt;&lt;/a&gt;dd the file onto the Lustre fs:&lt;/h4&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/root/test.1 of=/mnt/lustre/test.1 oflag=direct bs=128k
2048+0 records in
2048+0 records out
1048576 bytes (1.0 MB) copied, 0.029338 seconds, 45.5 MB/s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;h4&gt;&lt;a name=&quot;Failadriveineacharraytobeonthesafeside%3A&quot;&gt;&lt;/a&gt;Fail a drive in each array to be on the safe side:&lt;/h4&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;mdadm /dev/md0 --fail /dev/sdb7
mdadm /dev/md127 --fail /dev/sdb10
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;h4&gt;&lt;a name=&quot;ddthefileofftheLustrefs%3A&quot;&gt;&lt;/a&gt;dd the file off the Lustre fs:&lt;/h4&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/mnt/lustre/test.1 iflag=direct of=/root/test.2 oflag=direct bs=128k
8+0 records in
8+0 records out
1048576 bytes (1.0 MB) copied, 0.012411 seconds, 84.5 MB/s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h4&gt;&lt;a name=&quot;md5sumthe%7B%7Btest.1%7D%7Dand%7B%7Btest.2%7D%7D&quot;&gt;&lt;/a&gt;md5sum the &lt;tt&gt;test.1&lt;/tt&gt; and &lt;tt&gt;test.2&lt;/tt&gt;&lt;/h4&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# md5sum test.1 test.2
2cb6571392d5ba2e0bd34e3a33f35a43  test.1
2cb6571392d5ba2e0bd34e3a33f35a43  test.2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;h3&gt;&lt;a name=&quot;Additionalinfo%28observedattheendoftest%29.&quot;&gt;&lt;/a&gt;Additional info (observed at the end of test).&lt;/h3&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# lfs getstripe /mnt/lustre/test.1 
/mnt/lustre/test.1
lmm_stripe_count:   1
lmm_stripe_size:    1048576
lmm_stripe_offset:  1
	obdidx		 objid		objid		 group
	     1	             3	          0x3	             0

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# mount
/dev/sda1 on / type ext3 (rw)
proc on /proc type proc (rw)
sysfs on /sys type sysfs (rw)
devpts on /dev/pts type devpts (rw,gid=5,mode=620)
tmpfs on /dev/shm type tmpfs (rw)
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)
sunrpc on /&lt;span class=&quot;code-keyword&quot;&gt;var&lt;/span&gt;/lib/nfs/rpc_pipefs type rpc_pipefs (rw)
/dev/loop0 on /mnt/mds1 type lustre (rw,user_xattr,acl)
/dev/loop1 on /mnt/ost1 type lustre (rw)
/dev/loop2 on /mnt/ost2 type lustre (rw)
fat-amd-2.lab.whamcloud.com@tcp:/lustre on /mnt/lustre type lustre (rw,user_xattr,acl,flock)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# losetup /dev/loop1
/dev/loop1: [0011]:6353 (/dev/md0)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# cat /proc/mdstat 
Personalities : [raid6] [raid5] [raid4] 
md127 : active raid5 sdb10[3](F) sdb9[1] sdb8[0]
      19550848 blocks level 5, 64k chunk, algorithm 2 [3/2] [UU_]
		in: 217 reads, 9466 writes; out: 17107851 reads, 4902113 writes
		14669311 in raid5d, 156389 out of stripes, 22010562 handle called
		reads: 269 &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; rmw, 407 &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; rcw. zcopy writes: 0, copied writes: 9466
		0 delayed, 0 bit delayed, 0 active, queues: 2 in, 0 out
		0 expanding overlap

      
md0 : active raid5 sdb7[3](F) sdb6[1] sdb5[0]
      19550848 blocks level 5, 64k chunk, algorithm 2 [3/2] [UU_]
		in: 321 reads, 17595 writes; out: 17108767 reads, 4914447 writes
		14674850 in raid5d, 156647 out of stripes, 22024334 handle called
		reads: 677 &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; rmw, 824 &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; rcw. zcopy writes: 0, copied writes: 17595
		0 delayed, 0 bit delayed, 0 active, queues: 4 in, 0 out
		0 expanding overlap

      
unused devices: &amp;lt;none&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="17573" author="pjones" created="Mon, 11 Jul 2011 08:29:45 +0000"  >&lt;p&gt;Vitaly&lt;/p&gt;

&lt;p&gt;It seems that Alexey is unavailable to answer even the simplest question about this ticket so that we can establish the scope of the issue and whether it impacts RHEL5 or not. Are you able to assist in this matter? If not, could you please advise who at Xyratex could?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="17584" author="ericm" created="Mon, 11 Jul 2011 14:26:30 +0000"  >&lt;p&gt;Richard, there&apos;s several issues in your test:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;In your test io size is 128K. In that case, when you create md array, you should specify chunk size as 128K or less.&lt;/li&gt;
	&lt;li&gt;When writing file on Lustre, you should use &quot;oflag=direct&quot; instead of iflag.&lt;/li&gt;
	&lt;li&gt;Before reading file back from Lustre, you should fail a drive at first.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I&apos;ve no idea whether RHEL5 have this problem (RHEL5 is different from RHEL6 in MD sense, I didn&apos;t check details of that). I tend to think the bugs are introduced in by porting patch to RHEL6. So if in the end you can&apos;t reproduce this on RHEL5, that probably means RHEL5 is safe.&lt;/p&gt;</comment>
                            <comment id="17587" author="ericm" created="Mon, 11 Jul 2011 16:34:26 +0000"  >&lt;p&gt;Richard, you updated your previous comments, do you mean you did the right steps but still can&apos;t reproduce it?&lt;/p&gt;</comment>
                            <comment id="17588" author="rhenwood" created="Mon, 11 Jul 2011 16:59:07 +0000"  >&lt;p&gt;I&apos;ve update the reproducer above to include Eric&apos;s suggestions.&lt;/p&gt;

&lt;p&gt;I am not able to reproduce this on RHEL5.&lt;/p&gt;

&lt;p&gt;However, I&apos;m reluctant to assert that this isn&apos;t a problem with RHEL5 as the above reproducer does not reproduce on RHEL6.&lt;/p&gt;

&lt;p&gt;I would appreciate further feedback on the reproducer, maybe Oleg can comment?&lt;/p&gt;</comment>
                            <comment id="17589" author="ericm" created="Mon, 11 Jul 2011 17:10:00 +0000"  >&lt;p&gt;I noticed in your /proc/mdstat output, the zcopy write account is 0. So there&apos;s actually no zerocopy write happened in your test. I&apos;m not sure why...&lt;/p&gt;

&lt;p&gt;I don&apos;t know whether zcopy in RHEL5 works the same way as in RHEL6. But one thing you can try is using bs=256K in the dd write, which generate full stripe write (with 128K chunk size). I&apos;ve no idea other than this. If you managed to get zcopy write and no data corruption, then RHEL5 is probably fine.&lt;/p&gt;</comment>
                            <comment id="17590" author="rhenwood" created="Mon, 11 Jul 2011 17:43:21 +0000"  >&lt;p&gt;Thanks for the suggestions;&lt;/p&gt;

&lt;p&gt;I&apos;ve tried with the 256k blocksize, and increasing the size of the file that was being shifted around.&lt;/p&gt;

&lt;p&gt;The zcopy value stayed at 0.&lt;/p&gt;

&lt;p&gt;These changes also did not reproduce the bug on my RHEL6. Can you confirm that the reproducer above reproduces on your RHEL6 testbed?&lt;/p&gt;</comment>
                            <comment id="17592" author="ericm" created="Mon, 11 Jul 2011 18:20:52 +0000"  >&lt;p&gt;Did you actually run Lustre on top of raid? Because I noticed following lines of mount:&lt;/p&gt;

&lt;p&gt;/dev/loop1 on /mnt/ost1 type lustre (rw)&lt;br/&gt;
/dev/loop2 on /mnt/ost2 type lustre (rw)&lt;/p&gt;</comment>
                            <comment id="17593" author="rhenwood" created="Mon, 11 Jul 2011 18:29:51 +0000"  >&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# losetup /dev/loop1
/dev/loop1: [0011]:6353 (/dev/md0)
# losetup /dev/loop2
/dev/loop2: [0011]:13978 (/dev/md127)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;m reading this as loop on the md devices.&lt;/p&gt;</comment>
                            <comment id="17599" author="shadow" created="Tue, 12 Jul 2011 01:01:08 +0000"  >&lt;p&gt;2Peter: yes, i&apos;m busy with different issue, that will be reported later.&lt;br/&gt;
OOM killer can kill OST_IO threads that is a block client to reconnect until node reboot.&lt;/p&gt;

&lt;p&gt;2Richard: looks you forget to clear OST_MOUNT_OPTS / MDS_MOUNT_OPTS.&lt;/p&gt;</comment>
                            <comment id="17614" author="rhenwood" created="Tue, 12 Jul 2011 16:24:44 +0000"  >&lt;p&gt;Hi Alexey, I have tried clearing OST_MOUNT_OPTS / MDS_MOUNT_OPTS as you suggest.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;STDEV1=&lt;span class=&quot;code-quote&quot;&gt;&quot;/dev/md0&quot;&lt;/span&gt; OSTDEV2=&lt;span class=&quot;code-quote&quot;&gt;&quot;/dev/md127&quot;&lt;/span&gt; OST_MOUNT_OPTS=&lt;span class=&quot;code-quote&quot;&gt;&quot;&quot; MDS_MOUNT_OPTS=&quot;&lt;/span&gt;&quot; /usr/lib64/lustre/tests/llmount.sh
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;No difference.&lt;/p&gt;</comment>
                            <comment id="17615" author="ericm" created="Tue, 12 Jul 2011 16:52:45 +0000"  >&lt;p&gt;Richard, I think firstly you need to figure out why there&apos;s no zerocopy write happened on RHEL6, then move to RHEL5. I don&apos;t know your exact environment, maybe you should consult MD expert in WC.&lt;/p&gt;</comment>
                            <comment id="17621" author="rhenwood" created="Tue, 12 Jul 2011 17:52:31 +0000"  >&lt;p&gt;llmount.sh uses loopback devices (even with clearing OST_MOUNT_OPTS/MDS_MOUNT_OPTS as suggested by Alexey.) These devices create indirection that may obscure the problem.&lt;/p&gt;

&lt;p&gt;As an alternative to llmount.sh I&apos;m manually creating the filesystem. I have used the following steps on RHEL5 and my RHEL6. I have been unable to recreate the bug reliably. As you suggest, I am working on a method to predictably perform zerocopy writes.&lt;/p&gt;

&lt;p&gt;Eric, can you run these commands on your RHEL6 environment to confirm that these instructions reproduce this bug on RHEL6.&lt;/p&gt;

&lt;h3&gt;&lt;a name=&quot;CreateaMDdevice.&quot;&gt;&lt;/a&gt;Create a MD device.&lt;/h3&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;mdadm --create --verbose /dev/md0 --chunk=64 --level=5 --raid-devices=3 /dev/sdb5 /dev/sdb6 /dev/sdb7
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h3&gt;&lt;a name=&quot;CreateMDS%2FMDTandmount.&quot;&gt;&lt;/a&gt;Create MDS/MDT and mount.&lt;/h3&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# mkfs.lustre --fsname=temp --mgs --mdt /dev/sdb11
...
# mount -t lustre /dev/sdb11 /mnt/mdt
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h3&gt;&lt;a name=&quot;CreateOSTontheMDdeviceandmountonOSS.&quot;&gt;&lt;/a&gt;Create OST on the MD device and mount on OSS.&lt;/h3&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# mkfs.lustre --ost --fsname=temp --mgsnode=10.0.0.1@tcp0 /dev/md0
...
# mount -t lustre /dev/md0 /mnt/ost1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h3&gt;&lt;a name=&quot;MounttheLustrefs.&quot;&gt;&lt;/a&gt;Mount the Lustre fs.&lt;/h3&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# mount -t lustre 10.0.0.1@tcp0:/temp /mnt/lustre
...
# mount
/dev/sda1 on / type ext3 (rw)
proc on /proc type proc (rw)
sysfs on /sys type sysfs (rw)
devpts on /dev/pts type devpts (rw,gid=5,mode=620)
tmpfs on /dev/shm type tmpfs (rw)
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)
sunrpc on /&lt;span class=&quot;code-keyword&quot;&gt;var&lt;/span&gt;/lib/nfs/rpc_pipefs type rpc_pipefs (rw)
/dev/sdb11 on /mnt/mdt type lustre (rw)
/dev/md0 on /mnt/ost1 type lustre (rw)
10.0.0.1@tcp0:/temp on /mnt/lustre type lustre (rw)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h3&gt;&lt;a name=&quot;Turnoffstripesizeetc.&quot;&gt;&lt;/a&gt;Turn off stripe size etc.&lt;/h3&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# echo 32 &amp;gt; /sys/block/md0/md/stripe_cache_size
# echo 0 &amp;gt; /proc/fs/lustre/obdfilter/temp-OST0000/writethrough_cache_enable
# echo 0 &amp;gt; /proc/fs/lustre/obdfilter/temp-OST0000/read_cache_enable
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;h3&gt;&lt;a name=&quot;CopyfileontoLustre%2Cfaildriveandcopyoff.&quot;&gt;&lt;/a&gt;Copy file onto Lustre, fail drive and copy off.&lt;/h3&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/urandom of=/root/test.1 oflag=direct bs=128k count=8
8+0 records in
8+0 records out
1048576 bytes (1.0 MB) copied, 0.230175 seconds, 4.6 MB/s

# md5sum test.1
d02213ae420e043d42688874a93c7e1b  test.1

# dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/root/test.1 of=/mnt/lustre/test.1 oflag=direct bs=128k
8+0 records in
8+0 records out
1048576 bytes (1.0 MB) copied, 0.080452 seconds, 13.0 MB/s

# mdadm /dev/md0 --fail /dev/sdb7
mdadm: set /dev/sdb7 faulty in /dev/md0

# dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/mnt/lustre/test.1 iflag=direct of=/root/test.2 oflag=direct bs=128k
8+0 records in
8+0 records out
1048576 bytes (1.0 MB) copied, 0.0758 seconds, 13.8 MB/s

# md5sum test.1 test.2
bf4d5039cb2c7acd744d119a262bc90b  test.1
bf4d5039cb2c7acd744d119a262bc90b  test.2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="17624" author="ericm" created="Tue, 12 Jul 2011 19:11:57 +0000"  >&lt;p&gt;Richard, I don&apos;t have the env to test it, and I&apos;ll be away for two weeks from tomorrow, sorry I couldn&apos;t be more helpful... I just read your steps again, it seems all correct to me. So please get zerocopy write working on RHEL6 as the first step.&lt;/p&gt;</comment>
                            <comment id="17638" author="rhenwood" created="Wed, 13 Jul 2011 13:55:16 +0000"  >&lt;p&gt;UPDATE: &lt;/p&gt;

&lt;p&gt;There currently is no zero-copy patch for RHEL6 in the Lustre source. As a result, this bug should not be reproducible on RHEL6.&lt;/p&gt;

&lt;p&gt;There is a zero-copy patch for RHEL5 in the Lustre source. I have been unable to reliably generate zero-copy writes by writing after a drive has failed. However, I am still unable to observe data corruption with RHEL5.&lt;/p&gt;

&lt;p&gt;Because the zero copy patch is not available for RHEL6, I recommend this issue be CLOSED: Can&apos;t reproduce. &lt;br/&gt;
A new Jira issue can be created for the RHEL6 zero-copy patch.&lt;/p&gt;</comment>
                            <comment id="17654" author="pjones" created="Wed, 13 Jul 2011 17:46:06 +0000"  >&lt;p&gt;Dropping priority so this is no longer a blocker. If there is any evidence that this affects the master code on either RHEL5 or RHEL6 then it can raise in priority again.&lt;/p&gt;</comment>
                            <comment id="17655" author="rhenwood" created="Wed, 13 Jul 2011 18:18:55 +0000"  >&lt;p&gt;Apologies, my previous comment contained an inaccuracy:&lt;/p&gt;

&lt;p&gt;I have been &lt;em&gt;able&lt;/em&gt; to reliably generate zero-copy writes by writing after a drive has failed. However, I am still unable to observe data corruption with RHEL5.&lt;/p&gt;</comment>
                            <comment id="18237" author="pjones" created="Tue, 26 Jul 2011 13:03:35 +0000"  >&lt;p&gt;As I understand it, the bug in the zero copy patch has been fixed in the version contributed under LU535&lt;/p&gt;</comment>
                            <comment id="48113" author="nrutman" created="Tue, 20 Nov 2012 14:10:03 +0000"  >&lt;p&gt;Xyratex: &lt;a href=&quot;http://jira-nss.xy01.xyratex.com:8080/browse/MRP-158&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;MRP-158&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="10291" name="01.fix_uptodate_flag.patch" size="918" author="shadow" created="Mon, 27 Jun 2011 14:52:57 +0000"/>
                            <attachment id="10290" name="02.switch_page.patch" size="3267" author="shadow" created="Mon, 27 Jun 2011 14:52:57 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvi5z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6592</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>