<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:28:41 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2843] ASSERTION( last_rec-&gt;lrh_index == tail-&gt;lrt_index )</title>
                <link>https://jira.whamcloud.com/browse/LU-2843</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Steps leading up to:&lt;/p&gt;

&lt;ol&gt;
	&lt;li&gt;create new filesystem&lt;/li&gt;
	&lt;li&gt;register changelog user&lt;/li&gt;
	&lt;li&gt;run createmany and unlinkmany in infinite loop until catalog full (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1586&quot; title=&quot;no free catalog slots for log&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1586&quot;&gt;&lt;del&gt;LU-1586&lt;/del&gt;&lt;/a&gt;)&lt;/li&gt;
	&lt;li&gt;lctl --device lustre-MDT0000 changelog_deregister  cl1&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;Build contains patches for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2129&quot; title=&quot;ASSERTION( last_rec-&amp;gt;lrh_index == tail-&amp;gt;lrt_index )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2129&quot;&gt;&lt;del&gt;LU-2129&lt;/del&gt;&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2102&quot; title=&quot;changelog_user_init_cb())  ASSERTION( rec-&amp;gt;cur_hdr.lrh_type == CHANGELOG_USER_REC ) &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2102&quot;&gt;&lt;del&gt;LU-2102&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 30248:0:(llog_osd.c:596:llog_osd_next_block()) ASSERTION( last_rec-&amp;gt;lrh_index == tail-&amp;gt;lrt_index ) failed: 
LustreError: 30248:0:(llog_osd.c:596:llog_osd_next_block()) LBUG
Pid: 30248, comm: lctl
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 30248  TASK: ffff88004bd7e080  CPU: 3   COMMAND: &quot;lctl&quot;
 #0 [ffff88005029b590] machine_kexec at ffffffff8103283b
 #1 [ffff88005029b5f0] crash_kexec at ffffffff810ba492
 #2 [ffff88005029b6c0] panic at ffffffff814fdcf2
 #3 [ffff88005029b740] lbug_with_loc at ffffffffa0fc7fbb [libcfs]
 #4 [ffff88005029b760] llog_osd_next_block at ffffffffa06ba3ed [obdclass]
 #5 [ffff88005029b800] llog_process_thread at ffffffffa068a7f6 [obdclass]
 #6 [ffff88005029b8b0] llog_process_or_fork at ffffffffa068cb0d [obdclass]
 #7 [ffff88005029b900] llog_cat_process_cb at ffffffffa068f282 [obdclass]
 #8 [ffff88005029b950] llog_process_thread at ffffffffa068adfb [obdclass]
 #9 [ffff88005029ba00] llog_process_or_fork at ffffffffa068cb0d [obdclass]
#10 [ffff88005029ba50] llog_cat_process_or_fork at ffffffffa068d5b9 [obdclass]
#11 [ffff88005029bae0] llog_cat_process at ffffffffa068d7c9 [obdclass]
#12 [ffff88005029bb00] llog_changelog_cancel at ffffffffa0c730ff [mdd]
#13 [ffff88005029bb40] llog_cancel at ffffffffa0691538 [obdclass]
#14 [ffff88005029bb80] mdd_changelog_llog_cancel at ffffffffa0c77dae [mdd]
#15 [ffff88005029bbc0] mdd_changelog_user_purge at ffffffffa0c78220 [mdd]
#16 [ffff88005029bc10] mdd_iocontrol at ffffffffa0c78776 [mdd]
#17 [ffff88005029bc70] mdt_ioc_child at ffffffffa0d50129 [mdt]
#18 [ffff88005029bcf0] mdt_iocontrol at ffffffffa0d54608 [mdt]
#19 [ffff88005029bd90] class_handle_ioctl at ffffffffa069ba4f [obdclass]
#20 [ffff88005029be40] obd_class_ioctl at ffffffffa06852ab [obdclass]
#21 [ffff88005029be60] vfs_ioctl at ffffffff8118ddb2
#22 [ffff88005029bea0] do_vfs_ioctl at ffffffff8118df54
#23 [ffff88005029bf30] sys_ioctl at ffffffff8118e4d1
#24 [ffff88005029bf80] system_call_fastpath at ffffffff8100b0f2
    RIP: 0000003ae74dde07  RSP: 00007fff6c48fc00  RFLAGS: 00010246
    RAX: 0000000000000010  RBX: ffffffff8100b0f2  RCX: 0000000000000000
    RDX: 00007fff6c48fc40  RSI: 00000000424066b2  RDI: 0000000000000003
    RBP: 0000000000000001   R8: 0000000000000000   R9: 0000000000000240
    R10: 0000000000000000  R11: 0000000000000246  R12: 00000000424066b2
    R13: 00007fff6c48fc40  R14: 0000000000678380  R15: 0000000000000003
    ORIG_RAX: 0000000000000010  CS: 0033  SS: 002b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>2.6.32-279.9.1.1chaos.ch5.1.x86_64&lt;br/&gt;
KVM x86_64 virtual machine</environment>
        <key id="17646">LU-2843</key>
            <summary>ASSERTION( last_rec-&gt;lrh_index == tail-&gt;lrt_index )</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="nedbass">Ned Bass</reporter>
                        <labels>
                            <label>MB</label>
                    </labels>
                <created>Wed, 20 Feb 2013 17:42:27 +0000</created>
                <updated>Wed, 3 Apr 2013 16:50:28 +0000</updated>
                            <resolved>Wed, 3 Apr 2013 16:50:28 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="52841" author="pjones" created="Thu, 21 Feb 2013 18:09:28 +0000"  >&lt;p&gt;Hongchao&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="52923" author="tappro" created="Sat, 23 Feb 2013 06:31:28 +0000"  >&lt;p&gt;It makes sense to add debug into llog_osd_write_rec() to see how log records were written upon llog ends. There are several separated writes and maybe we have situation when part of data is written and other is not. Assertion in llog_osd_next_block() is just result of failed write most probably&lt;/p&gt;</comment>
                            <comment id="53250" author="hongchao.zhang" created="Mon, 4 Mar 2013 04:45:15 +0000"  >&lt;p&gt;status update: &lt;br/&gt;
the issue has been reproduced locally and the patch is under creation &amp;amp; test.&lt;/p&gt;</comment>
                            <comment id="53277" author="tappro" created="Mon, 4 Mar 2013 12:58:49 +0000"  >&lt;p&gt;Hongchao, could you decribe the reason of issue? We saw similar one in Orion but it gone at some point. I wonder what can be root cause now.&lt;/p&gt;</comment>
                            <comment id="53412" author="hongchao.zhang" created="Wed, 6 Mar 2013 02:02:59 +0000"  >&lt;p&gt;the patch is &lt;a href=&quot;http://review.whamcloud.com/#change,5604&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,5604&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;this issue is caused by the ways of writing LLOG record, one record is written into LLOG by 3 continuous write operations at most,&lt;br/&gt;
if the second or third write failed, then the data written previously will damage the LLOG for it did not know the exact size of&lt;br/&gt;
the LLOG file and it will treat all data it can read as LLOG records.&lt;/p&gt;</comment>
                            <comment id="53672" author="tappro" created="Mon, 11 Mar 2013 02:10:13 +0000"  >&lt;p&gt;yes, that is technical debt, those 3-step blob write can be partial.&lt;/p&gt;</comment>
                            <comment id="53911" author="hongchao.zhang" created="Wed, 13 Mar 2013 10:17:56 +0000"  >&lt;p&gt;the single write can also cause this problem,&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; osd_ldiskfs_write_record(struct inode *inode, void *buf, &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; bufsize,
                             &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; write_NUL, loff_t *offs, handle_t *handle)
{
        ...
        &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (bufsize &amp;gt; 0) {
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (bh != NULL)
                        brelse(bh);

                block = offset &amp;gt;&amp;gt; inode-&amp;gt;i_blkbits;
                boffs = offset &amp;amp; (blocksize - 1);
                size = min(blocksize - boffs, bufsize);
                bh = ldiskfs_bread(handle, inode, block, 1, &amp;amp;err);      &amp;lt;---------- could fail &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; -ENOSPC
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!bh) {
                        CERROR(&lt;span class=&quot;code-quote&quot;&gt;&quot;%s: error reading offset %llu (block %lu): &quot;&lt;/span&gt;
                               &lt;span class=&quot;code-quote&quot;&gt;&quot;rc = %d\n&quot;&lt;/span&gt;,
                               inode-&amp;gt;i_sb-&amp;gt;s_id, offset, block, err);
                        &lt;span class=&quot;code-keyword&quot;&gt;break&lt;/span&gt;;
                }

                ...
        }

        ...
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;currently, osd_punch will cause LBUG &quot;LASSERT(op &amp;lt; OSD_OT_MAX)&quot; in &quot;osd_trans_exec_op&quot;&lt;/p&gt;</comment>
                            <comment id="53951" author="tappro" created="Wed, 13 Mar 2013 17:38:42 +0000"  >&lt;p&gt;it can fail, yes, but it doesn&apos;t change i_size so no need to return it back. As for OSD_OT_MAX - it need just be increased if such situation is possible and valid&lt;/p&gt;</comment>
                            <comment id="53997" author="hongchao.zhang" created="Thu, 14 Mar 2013 06:17:01 +0000"  >&lt;p&gt;no, it change the size even on the failed case(-ENOSPC)&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-comment&quot;&gt;/* correct in-core and on-disk sizes */&lt;/span&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (new_size &amp;gt; i_size_read(inode)) {
                spin_lock(&amp;amp;inode-&amp;gt;i_lock);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (new_size &amp;gt; i_size_read(inode))
                        i_size_write(inode, new_size);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (i_size_read(inode) &amp;gt; LDISKFS_I(inode)-&amp;gt;i_disksize) {
                        LDISKFS_I(inode)-&amp;gt;i_disksize = i_size_read(inode);
                        dirty_inode = 1;
                }
                spin_unlock(&amp;amp;inode-&amp;gt;i_lock);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (dirty_inode)
                        inode-&amp;gt;i_sb-&amp;gt;s_op-&amp;gt;dirty_inode(inode);
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;does we fix here by not change the size for error case (could it affect other cases which need this behavior) or fix it in the LLOG layer?&lt;/p&gt;</comment>
                            <comment id="54011" author="tappro" created="Thu, 14 Mar 2013 09:55:12 +0000"  >&lt;p&gt;check new_size value, it is not changed in case of failed ldiskfs_bread() so we will not enter under that &apos;if&apos; clause. That would be just OSD bug if size is changed upon failed write.&lt;/p&gt;</comment>
                            <comment id="55400" author="pjones" created="Wed, 3 Apr 2013 16:50:28 +0000"  >&lt;p&gt;Landed for 2.4&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvjjj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6882</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>