<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:22:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2170] osc_extent_merge()) ASSERTION( cur-&gt;oe_osclock == victim-&gt;oe_osclock) while running racer</title>
                <link>https://jira.whamcloud.com/browse/LU-2170</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Hit this assertion running racer in a loop in a single node setup:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 8788.630974] LustreError: 4352:0:(vvp_io.c:1038:vvp_io_commit_write()) Skipped 833 previous similar messages
[ 8834.780249] LustreError: 22605:0:(osc_cache.c:506:osc_extent_merge()) ASSERTION( cur-&amp;gt;oe_osclock == victim-&amp;gt;oe_osclock ) failed: 
[ 8834.780830] LustreError: 22605:0:(osc_cache.c:506:osc_extent_merge()) LBUG
[ 8834.781139] Pid: 22605, comm: cat
[ 8834.781364] 
[ 8834.781364] Call Trace:
[ 8834.781773]  [&amp;lt;ffffffffa0c9e915&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[ 8834.782075]  [&amp;lt;ffffffffa0c9ef27&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[ 8834.782366]  [&amp;lt;ffffffffa04ce847&amp;gt;] osc_extent_merge+0x697/0x6a0 [osc]
[ 8834.782657]  [&amp;lt;ffffffff81112df1&amp;gt;] ? generic_file_buffered_write+0x1f1/0x300
[ 8834.782947]  [&amp;lt;ffffffff8106fea7&amp;gt;] ? current_fs_time+0x27/0x30
[ 8834.783209]  [&amp;lt;ffffffff81051f73&amp;gt;] ? __wake_up+0x53/0x70
[ 8834.783474]  [&amp;lt;ffffffffa04d601c&amp;gt;] osc_extent_release+0xfc/0x3e0 [osc]
[ 8834.783782]  [&amp;lt;ffffffffa059a610&amp;gt;] ? lov_io_end_wrapper+0x0/0x100 [lov]
[ 8834.784070]  [&amp;lt;ffffffffa04c7ddf&amp;gt;] osc_io_end+0x1f/0x30 [osc]
[ 8834.784371]  [&amp;lt;ffffffffa11dd7a0&amp;gt;] cl_io_end+0x60/0x150 [obdclass]
[ 8834.784661]  [&amp;lt;ffffffffa059a701&amp;gt;] lov_io_end_wrapper+0xf1/0x100 [lov]
[ 8834.785159]  [&amp;lt;ffffffffa059a1ae&amp;gt;] lov_io_call+0x8e/0x130 [lov]
[ 8834.785465]  [&amp;lt;ffffffffa059bdec&amp;gt;] lov_io_end+0x4c/0x110 [lov]
[ 8834.785758]  [&amp;lt;ffffffffa11dd7a0&amp;gt;] cl_io_end+0x60/0x150 [obdclass]
[ 8834.786013]  [&amp;lt;ffffffffa11e28d2&amp;gt;] cl_io_loop+0xc2/0x1b0 [obdclass]
[ 8834.786353]  [&amp;lt;ffffffffa0a7822b&amp;gt;] ll_file_io_generic+0x42b/0x550 [lustre]
[ 8834.786644]  [&amp;lt;ffffffffa0a7911c&amp;gt;] ll_file_aio_write+0x13c/0x2c0 [lustre]
[ 8834.811295] Call Trace:
[ 8834.811521]  [&amp;lt;ffffffff814f75e4&amp;gt;] ? panic+0xa0/0x168
[ 8834.811788]  [&amp;lt;ffffffffa0c9ef7b&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
[ 8834.812079]  [&amp;lt;ffffffffa04ce847&amp;gt;] ? osc_extent_merge+0x697/0x6a0 [osc]
[ 8834.812364]  [&amp;lt;ffffffff81112df1&amp;gt;] ? generic_file_buffered_write+0x1f1/0x300
[ 8834.812605]  [&amp;lt;ffffffff8106fea7&amp;gt;] ? current_fs_time+0x27/0x30
[ 8834.812928]  [&amp;lt;ffffffff81051f73&amp;gt;] ? __wake_up+0x53/0x70
[ 8834.813301]  [&amp;lt;ffffffffa04d601c&amp;gt;] ? osc_extent_release+0xfc/0x3e0 [osc]
[ 8834.813615]  [&amp;lt;ffffffffa059a610&amp;gt;] ? lov_io_end_wrapper+0x0/0x100 [lov]
[ 8834.813903]  [&amp;lt;ffffffffa04c7ddf&amp;gt;] ? osc_io_end+0x1f/0x30 [osc]
[ 8834.814164]  [&amp;lt;ffffffffa11dd7a0&amp;gt;] ? cl_io_end+0x60/0x150 [obdclass]
[ 8834.816218]  [&amp;lt;ffffffffa059a701&amp;gt;] ? lov_io_end_wrapper+0xf1/0x100 [lov]
[ 8834.816513]  [&amp;lt;ffffffffa059a1ae&amp;gt;] ? lov_io_call+0x8e/0x130 [lov]
[ 8834.816814]  [&amp;lt;ffffffffa059bdec&amp;gt;] ? lov_io_end+0x4c/0x110 [lov]
[ 8834.817184]  [&amp;lt;ffffffffa11dd7a0&amp;gt;] ? cl_io_end+0x60/0x150 [obdclass]
[ 8834.817462]  [&amp;lt;ffffffffa11e28d2&amp;gt;] ? cl_io_loop+0xc2/0x1b0 [obdclass]
[ 8834.817769]  [&amp;lt;ffffffffa0a7822b&amp;gt;] ? ll_file_io_generic+0x42b/0x550 [lustre]
[ 8834.818078]  [&amp;lt;ffffffffa0a7911c&amp;gt;] ? ll_file_aio_write+0x13c/0x2c0 [lustre]
[ 8834.818385]  [&amp;lt;ffffffffa0a79409&amp;gt;] ? ll_file_write+0x169/0x2a0 [lustre]
[ 8834.818676]  [&amp;lt;ffffffff8117b2e8&amp;gt;] ? vfs_write+0xb8/0x1a0
[ 8834.818903]  [&amp;lt;ffffffff810d5192&amp;gt;] ? audit_syscall_entry+0x272/0x2a0
[ 8834.819185]  [&amp;lt;ffffffff8117bbb1&amp;gt;] ? sys_write+0x51/0x90
[ 8834.819425]  [&amp;lt;ffffffff8100b0f2&amp;gt;] ? system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I have a crashdump for this one&lt;/p&gt;</description>
                <environment></environment>
        <key id="16353">LU-2170</key>
            <summary>osc_extent_merge()) ASSERTION( cur-&gt;oe_osclock == victim-&gt;oe_osclock) while running racer</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jay">Jinshan Xiong</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Sat, 13 Oct 2012 18:20:09 +0000</created>
                <updated>Mon, 17 Jul 2017 18:48:40 +0000</updated>
                            <resolved>Tue, 13 Nov 2012 13:53:56 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                    <fixVersion>Lustre 2.1.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="46814" author="jay" created="Sat, 20 Oct 2012 20:15:24 +0000"  >&lt;p&gt;patch is at: &lt;a href=&quot;http://review.whamcloud.com/4316&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4316&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="47244" author="spitzcor" created="Thu, 1 Nov 2012 10:06:40 +0000"  >&lt;p&gt;The patch has been effective at eliminating our reproducer.&lt;/p&gt;

&lt;p&gt;BTW, here are the details of the reproducer from our test engineer:&lt;/p&gt;

&lt;p&gt;fsx_mpi - fsx(file system exerciser) This test is open sourced and part of LTP.&lt;/p&gt;

&lt;p&gt;iozone - file system benchmark, freely available&lt;/p&gt;

&lt;p&gt;growfiles_mpi - growfiles extends and truncates a file or files. &lt;br/&gt;
           growfiles can do data validation.  We have several&lt;br/&gt;
           versions of growfiles(pre/post the Cary/SGI split), but I &lt;br/&gt;
           think the one used here was the LTP version with the &lt;br/&gt;
           addition of MPI to use unique file names if desired. &lt;/p&gt;

&lt;p&gt;doio_mpi - doio_mpi uses the following system calls to do the I/O: &lt;br/&gt;
           read, write, readv, writev, mmread, mmwrite, fsync2,&lt;br/&gt;
           fdatasync&lt;/p&gt;

&lt;p&gt;           The program locks the regions of the file that are being&lt;br/&gt;
           operated on, to prevent overlapping requests from &lt;br/&gt;
           clobbering each other.  This test does data checking. &lt;br/&gt;
           doio_mpi was based on iogen/doio which are part of LTP, &lt;br/&gt;
           but it was a large re-write to add the MPI functionality. &lt;/p&gt;

&lt;p&gt;mmstress - This is a test program that performs general stress with&lt;br/&gt;
           memory race conditions. It contains seven testcases that&lt;br/&gt;
           will test race conditions between simultaneous read fault,&lt;br/&gt;
           write fault, copy on write (COW) fault e.t.c. system and&lt;br/&gt;
           can be easily ported to work on other operating systems &lt;br/&gt;
           as well.&lt;br/&gt;
           Part of LTP.  &lt;/p&gt;

&lt;p&gt;For the commands I was using, &apos;ubrun&apos; is a tool that can do a little &lt;br/&gt;
setup/cleanup, and identifies which binaries to use when running on&lt;br/&gt;
different systems.  &apos;aptun&apos; is our application launching tool, it&lt;br/&gt;
determines the batch scheduler used if any, creates a job input&lt;br/&gt;
file(if needed), then launches the job.&lt;/p&gt;

&lt;p&gt;I used a tool, FSLOAD, that randomly picks a command from the pool &lt;br/&gt;
of commands, and runs them on 5%-10% of the PEs on the system.  I&lt;br/&gt;
typically had 5-10 jobs running at any time until the problem hit.&lt;/p&gt;

&lt;p&gt;The commands I was using: &lt;br/&gt;
ubrun -t -A &quot;rm ./fsx*&quot; -e LTPROOT_CL -s &quot;All operations completed -OK!&quot; aptrun -n 4 LTPROOT_CL=testcases/bin/fsx-linux_mpi -q -WR -N 30000 ./fsx1.$$&lt;/p&gt;

&lt;p&gt;ubrun -t -e APPS_CL -s &quot;iozone test complete&quot; aptrun -n 1 -M 1 APPS_CL=iozone/src/current/RUN/iozone -A2 -f ./ioz1.$$&lt;/p&gt;

&lt;p&gt;ubrun -t -e OSTEST_CL -s &quot;Hit time value&quot; aptrun -n 4 OSTEST_CL=bin/growfiles_mpi -b -e 1 -i 0 -u -L 300 -d .&lt;/p&gt;

&lt;p&gt;ubrun -t -e LTPROOT_CL -o aptrun -n 4  LTPROOT_CL=testcases/bin/doio_mpi -i 50000 10000b:./do1&lt;/p&gt;

&lt;p&gt;ubrun -s &quot;Hit time value of&quot;  -x -e OSTEST_CL -o -T gf1 -t -D aptrun -n 8 OSTEST_CL=bin/growfiles_mpi -b -i 0 -L 300 -g 4090 -T 100 -t 408990 -C 10 -c 1000 -d dir1 -S 200&lt;/p&gt;

&lt;p&gt;ubrun &lt;del&gt;s &quot;Hit time value of&quot;  -x -e OSTEST_CL -o -T gf5 -t -D aptrun -n 8 OSTEST_CL=bin/growfiles_mpi -b -r 1-5000 -R 0&lt;/del&gt;-1 -i 0 -L 300 -C 1 -S 2&lt;/p&gt;

&lt;p&gt;ubrun -t -D -o -T CL_LTPmtest05 -e LTPROOT_CL -p. -B &quot;cp $LTPROOT_CL/testcases/bin/dummy .&quot; aptrun -n 1 LTPROOT_CL=testcases/bin/mmstress&lt;/p&gt;

&lt;p&gt;ubrun -t -D -o -T CL_LTPmtest05 -e LTPROOT_CL -p. -B &quot;cp $LTPROOT_CL/testcases/bin/dummy .&quot; aptrun -n 1 LTPROOT_CL=testcases/bin/mmstress&lt;/p&gt;</comment>
                            <comment id="47384" author="jay" created="Mon, 5 Nov 2012 02:05:35 +0000"  >&lt;p&gt;Cory, thank you so much about the reproduce info.&lt;/p&gt;</comment>
                            <comment id="47736" author="jay" created="Tue, 13 Nov 2012 13:53:56 +0000"  >&lt;p&gt;patch has been landed to master&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvaaf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5202</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>