<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:29:17 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2910] racer test_1, sanity test_54c: ASSERTION( cio-&gt;cui_tot_nrsegs &gt;= cio-&gt;cui_nrsegs ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-2910</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/3fa2a912-85b2-11e2-9f8d-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/3fa2a912-85b2-11e2-9f8d-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_1 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;client console shows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;18:47:35:Lustre: DEBUG MARKER: == racer test 1: racer on clients: client-22-ib,client-23-ib.lab.whamcloud.com DURATION=900 == 18:47:28 (1362451648)
18:47:35:Lustre: DEBUG MARKER: DURATION=900 MDSCOUNT=1 				   /usr/lib64/lustre/tests/racer/racer.sh /mnt/lustre2/racer 
18:47:35:Lustre: DEBUG MARKER: DURATION=900 MDSCOUNT=1 				   /usr/lib64/lustre/tests/racer/racer.sh /mnt/lustre/racer 
18:51:19:LustreError: 8649:0:(file.c:2601:ll_inode_revalidate_fini()) lustre: revalidate FID [0x200000bd0:0x9095:0x0] error: rc = -116
18:56:24:LustreError: 28058:0:(lcommon_cl.c:821:ccc_io_advance()) ASSERTION( cio-&amp;gt;cui_tot_nrsegs &amp;gt;= cio-&amp;gt;cui_nrsegs ) failed: 
18:56:24:LustreError: 28058:0:(lcommon_cl.c:821:ccc_io_advance()) LBUG
18:56:24:Pid: 28058, comm: cat
18:56:24:
18:56:24:Call Trace:
18:56:24: [&amp;lt;ffffffffa03fe895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
18:56:24: [&amp;lt;ffffffffa03fee97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
18:56:24: [&amp;lt;ffffffffa1264c0e&amp;gt;] ccc_io_advance+0x11e/0x190 [lustre]
18:56:24: [&amp;lt;ffffffffa05a96d6&amp;gt;] cl_io_rw_advance+0x66/0x150 [obdclass]
18:56:24: [&amp;lt;ffffffffa05ae602&amp;gt;] cl_io_loop+0xe2/0x1b0 [obdclass]
18:56:24: [&amp;lt;ffffffffa1215323&amp;gt;] ll_file_io_generic+0x223/0x570 [lustre]
18:56:24: [&amp;lt;ffffffffa12157af&amp;gt;] ll_file_aio_read+0x13f/0x2c0 [lustre]
18:56:25: [&amp;lt;ffffffffa121635c&amp;gt;] ll_file_read+0x16c/0x2a0 [lustre]
18:56:25: [&amp;lt;ffffffff81176cb5&amp;gt;] vfs_read+0xb5/0x1a0
18:56:25: [&amp;lt;ffffffff81176df1&amp;gt;] sys_read+0x51/0x90
18:56:25: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
18:56:25:
18:56:25:Kernel panic - not syncing: LBUG
18:56:25:Pid: 28058, comm: cat Not tainted 2.6.32-279.19.1.el6.x86_64 #1
18:56:25:Call Trace:
18:56:25: [&amp;lt;ffffffff814e9541&amp;gt;] ? panic+0xa0/0x168
18:56:25: [&amp;lt;ffffffffa03feeeb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
18:56:25: [&amp;lt;ffffffffa1264c0e&amp;gt;] ? ccc_io_advance+0x11e/0x190 [lustre]
18:56:25: [&amp;lt;ffffffffa05a96d6&amp;gt;] ? cl_io_rw_advance+0x66/0x150 [obdclass]
18:56:25: [&amp;lt;ffffffffa05ae602&amp;gt;] ? cl_io_loop+0xe2/0x1b0 [obdclass]
18:56:25: [&amp;lt;ffffffffa1215323&amp;gt;] ? ll_file_io_generic+0x223/0x570 [lustre]
18:56:25: [&amp;lt;ffffffffa12157af&amp;gt;] ? ll_file_aio_read+0x13f/0x2c0 [lustre]
18:56:25: [&amp;lt;ffffffffa121635c&amp;gt;] ? ll_file_read+0x16c/0x2a0 [lustre]
18:56:25: [&amp;lt;ffffffff81176cb5&amp;gt;] ? vfs_read+0xb5/0x1a0
18:56:25: [&amp;lt;ffffffff81176df1&amp;gt;] ? sys_read+0x51/0x90
18:56:26: [&amp;lt;ffffffff8100b072&amp;gt;] ? system_call_fastpath+0x16/0x1b
18:56:26:Initializing cgroup subsys cpuset
18:56:26:Initializing cgroup subsys cpu
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="17765">LU-2910</key>
            <summary>racer test_1, sanity test_54c: ASSERTION( cio-&gt;cui_tot_nrsegs &gt;= cio-&gt;cui_nrsegs ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>LB</label>
                    </labels>
                <created>Tue, 5 Mar 2013 14:31:56 +0000</created>
                <updated>Mon, 18 Mar 2013 23:20:36 +0000</updated>
                            <resolved>Mon, 18 Mar 2013 23:20:36 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="53521" author="niu" created="Thu, 7 Mar 2013 05:29:54 +0000"  >&lt;p&gt;The client debug log in above maloo link is unfortunately missed, Sarah, could you try to reproduce it and collect the client debug log (with D_VFSTRACE enabled)? Thanks in advance.&lt;/p&gt;</comment>
                            <comment id="53575" author="niu" created="Fri, 8 Mar 2013 00:05:30 +0000"  >&lt;p&gt;Looks the LASSERT could be triggered when we restart io with not restored iovector: &lt;a href=&quot;http://review.whamcloud.com/5652&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5652&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="53579" author="jay" created="Fri, 8 Mar 2013 00:43:57 +0000"  >&lt;p&gt;is file_swap.sh enabled in this run?&lt;/p&gt;</comment>
                            <comment id="53584" author="niu" created="Fri, 8 Mar 2013 02:05:54 +0000"  >&lt;p&gt;Xiong, I think the file_swap.sh should be disabled by default. Sarah knows it better.&lt;/p&gt;</comment>
                            <comment id="53601" author="jay" created="Fri, 8 Mar 2013 11:05:56 +0000"  >&lt;p&gt;I&apos;m thinking in which situation it would make IO restart. Also it may be incorrect to restart whole IO in some circumstances.&lt;/p&gt;</comment>
                            <comment id="53602" author="jay" created="Fri, 8 Mar 2013 11:07:30 +0000"  >&lt;p&gt;Now that the log is missed, I&apos;d like to wait for a while to see if this issue can be reproduced. I&apos;d like to make it clear before making a fix.&lt;/p&gt;</comment>
                            <comment id="53665" author="niu" created="Sun, 10 Mar 2013 23:47:10 +0000"  >&lt;p&gt;Xiong, in the racer test, file size could be changed anytime (mostly from rename, I think), so it&apos;s possible trigger io restart when ccc_prep_size() thinking that the ppos is beyond of file size, isn&apos;t it?&lt;/p&gt;

&lt;p&gt;From the current code, seems we support only the whole io restart when there isn&apos;t any bit of read/write done yet, we don&apos;t support io restart when partial io done, right?&lt;/p&gt;</comment>
                            <comment id="53705" author="sarah" created="Mon, 11 Mar 2013 13:05:41 +0000"  >&lt;p&gt;I ran this test on Friday 4 times and cannot reproduce it on my test nodes. BTW Xiong, I cannot find file_swap.sh under tests/racer, could you please point me out? Thanks.&lt;/p&gt;</comment>
                            <comment id="53712" author="jay" created="Mon, 11 Mar 2013 13:56:55 +0000"  >&lt;p&gt;Are you in master tree? Try to run the following command and see what you get:&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;jinxiong@intel lustre&amp;#93;&lt;/span&gt;$ git annotate tests/racer/file_swap.sh &lt;br/&gt;
4af3ab19        (Jinshan Xiong  2013-02-01 10:33:09 -0800       1)#!/bin/bash&lt;br/&gt;
4af3ab19        (Jinshan Xiong  2013-02-01 10:33:09 -0800       2)&lt;br/&gt;
4af3ab19        (Jinshan Xiong  2013-02-01 10:33:09 -0800       3)DIR=$1&lt;br/&gt;
4af3ab19        (Jinshan Xiong  2013-02-01 10:33:09 -0800       4)MAX=$2&lt;br/&gt;
4af3ab19        (Jinshan Xiong  2013-02-01 10:33:09 -0800       5)&lt;br/&gt;
4af3ab19        (Jinshan Xiong  2013-02-01 10:33:09 -0800       6)while : ; do&lt;/p&gt;</comment>
                            <comment id="53716" author="sarah" created="Mon, 11 Mar 2013 14:34:46 +0000"  >&lt;p&gt;Xiong, have you added the file into Makefile.am? If no, then it will not be built into the rpms&lt;/p&gt;</comment>
                            <comment id="53717" author="jay" created="Mon, 11 Mar 2013 14:37:44 +0000"  >&lt;p&gt;ah no, i didn&apos;t&lt;/p&gt;</comment>
                            <comment id="53725" author="jlevi" created="Mon, 11 Mar 2013 15:28:06 +0000"  >&lt;p&gt;Can this be reduced from a blocker since it doesn&apos;t sound like it can be reproduced?&lt;/p&gt;</comment>
                            <comment id="53739" author="sarah" created="Mon, 11 Mar 2013 20:14:14 +0000"  >&lt;p&gt;Didn&apos;t hit it in the latest tag testing, I think this can be dropped to major&lt;/p&gt;</comment>
                            <comment id="53755" author="niu" created="Tue, 12 Mar 2013 00:08:52 +0000"  >&lt;p&gt;Though it&apos;s hard to be reproduced, from the code we can see if io restared, the iov_len could be messed up and this LASSERT will be triggered at the end.&lt;/p&gt;

&lt;p&gt;Xiong, if you think layout change in racer test is unexpected, then there could be something serious wrong in the layout code, because I can see lots of io restart in my local testing (without file_swap, most io restart come from vvp_io_fini(), some from can_populate_pages()). Is it possible caused by rename?&lt;/p&gt;</comment>
                            <comment id="53800" author="jay" created="Tue, 12 Mar 2013 13:55:49 +0000"  >&lt;p&gt;Niu, you&apos;re right about this. There exists fake layout change after layout swapping is introduced, and rename can cause this because it revokes full ibits lock.&lt;/p&gt;</comment>
                            <comment id="54089" author="niu" created="Fri, 15 Mar 2013 03:36:40 +0000"  >&lt;p&gt;patch landed for 2.4&lt;/p&gt;</comment>
                            <comment id="54157" author="jhammond" created="Fri, 15 Mar 2013 20:12:05 +0000"  >&lt;p&gt;I&apos;m hitting this reliably using 2.3.62-47-g3a0bb68 (which includes the patch here) on my home setup.  The easiest way to reproduce for my is to run sanity 54c in a loop.  Maybe this should be reopened.&lt;/p&gt;</comment>
                            <comment id="54226" author="niu" created="Mon, 18 Mar 2013 03:08:28 +0000"  >&lt;p&gt;What I saw is another LASSERT which is newly added from the patch:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;LustreError: 31446:0:(lcommon_cl.c:794:ccc_io_update_iov()) ASSERTION( cio-&amp;gt;cui_tot_nrsegs &amp;gt;= cio-&amp;gt;cui_nrsegs ) failed: tot_nrsegs: 0, nrsegs: 1
LustreError: 31446:0:(lcommon_cl.c:794:ccc_io_update_iov()) LBUG
Kernel panic - not syncing: LBUG
Pid: 31446, comm: loop3 Tainted: P           ---------------    2.6.32 #1
Call Trace:
 [&amp;lt;ffffffff814fe08e&amp;gt;] ? panic+0xa0/0x168
 [&amp;lt;ffffffffa10b3f5b&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
 [&amp;lt;ffffffffa0f6e569&amp;gt;] ? ccc_io_update_iov+0xe9/0xf0 [lustre]
 [&amp;lt;ffffffffa0f77d8c&amp;gt;] ? vvp_io_rw_lock+0x8c/0x690 [lustre]
 [&amp;lt;ffffffffa10c4301&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa0f783c5&amp;gt;] ? vvp_io_write_lock+0x35/0x40 [lustre]
 [&amp;lt;ffffffffa06e03d3&amp;gt;] ? cl_io_lock+0x63/0x560 [obdclass]
 [&amp;lt;ffffffffa10bedf8&amp;gt;] ? libcfs_log_return+0x28/0x40 [libcfs]
 [&amp;lt;ffffffffa0f4a2bf&amp;gt;] ? ll_cl_init+0x38f/0x570 [lustre]
 [&amp;lt;ffffffffa0f4a6e3&amp;gt;] ? ll_prepare_write+0x53/0x1a0 [lustre]
 [&amp;lt;ffffffffa0f62c0e&amp;gt;] ? ll_write_begin+0x7e/0x1a0 [lustre]
 [&amp;lt;ffffffff8111346c&amp;gt;] ? pagecache_write_begin+0x1c/0x20
 [&amp;lt;ffffffff8135ef05&amp;gt;] ? do_lo_send_aops+0x155/0x1a0
 [&amp;lt;ffffffff8135f15b&amp;gt;] ? do_bio_filebacked+0x20b/0x2d0
 [&amp;lt;ffffffff8135edb0&amp;gt;] ? do_lo_send_aops+0x0/0x1a0
 [&amp;lt;ffffffff8135f2f1&amp;gt;] ? loop_thread+0xd1/0x270
 [&amp;lt;ffffffff81092180&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffff8135f220&amp;gt;] ? loop_thread+0x0/0x270
 [&amp;lt;ffffffff81091e16&amp;gt;] ? kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c14a&amp;gt;] ? child_rip+0xa/0x20
 [&amp;lt;ffffffff81091d80&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Actually, sometimes the tot_nrsegs could be zero (see ll_cl_init()), the newly added assert isn&apos;t correct in such case.&lt;/p&gt;</comment>
                            <comment id="54227" author="niu" created="Mon, 18 Mar 2013 03:12:50 +0000"  >&lt;p&gt;Skip iov update when tot_nrsegs is zero: &lt;a href=&quot;http://review.whamcloud.com/5747&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5747&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="54234" author="adilger" created="Mon, 18 Mar 2013 07:13:37 +0000"  >&lt;p&gt;Increasing priority, since this is causing almost every test to fail.&lt;/p&gt;</comment>
                            <comment id="54255" author="mdiep" created="Mon, 18 Mar 2013 15:29:01 +0000"  >&lt;p&gt;hit this &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/7e10a3f6-8e87-11e2-a173-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/7e10a3f6-8e87-11e2-a173-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="54309" author="adilger" created="Mon, 18 Mar 2013 21:36:22 +0000"  >&lt;p&gt;Jinshan replied to my comment on Patch Set 1:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&quot;Failed tests always show &quot;tot_nrsegs: 0&quot; in the LASSERT() so I can imagine this will fix the problem.&quot;&lt;/p&gt;

&lt;p&gt;The root cause of this problem is lloop write&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Is there something that should be fixed in the lloop code?  There has been more interest in this recently from Robert and Richard for VM provisioning and other cloud applications.&lt;/p&gt;</comment>
                            <comment id="54310" author="jay" created="Mon, 18 Mar 2013 22:02:32 +0000"  >&lt;blockquote&gt;
&lt;p&gt;Is there something that should be fixed in the lloop code? There has been more interest in this recently from Robert and Richard for VM provisioning and other cloud applications.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Did I say lustre loop device? I meant to say linux kernel loop device, sorry.&lt;/p&gt;

&lt;p&gt;First of all, we should figure out what kind of IO has been received by lloop device then we can work out corresponding fix. In current implementation of lloop, we use direct IO to serve bio requests so an obvious way to improve performance is to do cache write - especially if we find out there are plenty of small IOs.&lt;/p&gt;</comment>
                            <comment id="54321" author="pjones" created="Mon, 18 Mar 2013 23:20:36 +0000"  >&lt;p&gt;Landed for 2.4&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvk7r:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7001</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>