<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:28:30 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16610] ldiskfs_find_dest_de bad entry in directory when running io500 test</title>
                <link>https://jira.whamcloud.com/browse/LU-16610</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Directory corruption when running io500 test on openEuler 22.03:&lt;/p&gt;

&lt;p&gt;Client side log&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[openeuler@oe2203-test io500]$ sudo /io500.sh config-minimal.ini 
IO500 version io500-sc22_v2 (standard)
[RESULT] &#160; &#160; &#160; ior-easy-write &#160; &#160; &#160; &#160;0.105593 GiB/s : time 338.211 seconds
ERROR: open64(&lt;span class=&quot;code-quote&quot;&gt;&quot;/mnt/lustre/datafiles/2023.02.14-10.12.17/mdtest-easy/test-dir.0-0/mdtest_tree.0.0/file.mdtest.1.85&quot;&lt;/span&gt;, 66, 0664) failed. Error: Read-only file system, (aiori-POSIX.c:569)
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 1 in communicator MPI_COMM_WORLD
with errorcode -1.&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&#160;&lt;br/&gt;
Server side log&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 9962.007724] LDISKFS-fs error (device dm-0): ldiskfs_find_dest_de:2412: inode #5767170: block 3771253: comm mdt00_000: bad entry in directory: rec_len is smaller than minimal - offset=0, inode=0, rec_len=8, name_len=0, size=4096
[ 9962.051171] Aborting journal on device dm-0-8.
[ 9962.058456] LDISKFS-fs (dm-0): Remounting filesystem read-only
[ 9962.059877] LDISKFS-fs error (device dm-0) in iam_txn_add:547: Journal has aborted
[ 9962.064365] LustreError: 11366:0:(osd_io.c:2222:osd_ldiskfs_write_record()) journal_get_write_access() returned error -30
[ 9962.066805] LustreError: 11366:0:(llog_cat.c:592:llog_cat_add_rec()) llog_write_rec -30: lh=00000000c04e4ff3
[ 9962.069137] LustreError: 11366:0:(tgt_lastrcvd.c:1326:tgt_add_reply_data()) lustre-MDT0000: can&apos;t update reply_data file: rc = -30
[ 9962.071742] LustreError: 11366:0:(osd_handler.c:2089:osd_trans_stop()) lustre-MDT0000: failed in transaction hook: rc = -30
[ 9962.074184] LustreError: 11366:0:(osd_handler.c:2099:osd_trans_stop()) lustre-MDT0000: failed to stop transaction: rc = -30
[ 9962.074274] LustreError: 11348:0:(osd_handler.c:1789:osd_trans_commit_cb()) transaction @0x00000000c73ec34c commit error: 2&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;</description>
                <environment>openEuler 22.03 kernel: 5.10.0-60.79.0.103.oe2203.aarch64</environment>
        <key id="74907">LU-16610</key>
            <summary>ldiskfs_find_dest_de bad entry in directory when running io500 test</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="xinliang">Xinliang Liu</assignee>
                                    <reporter username="xinliang">Xinliang Liu</reporter>
                        <labels>
                    </labels>
                <created>Thu, 2 Mar 2023 02:19:14 +0000</created>
                <updated>Wed, 22 Mar 2023 01:57:18 +0000</updated>
                            <resolved>Wed, 22 Mar 2023 01:57:18 +0000</resolved>
                                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="364612" author="xinliang" created="Thu, 2 Mar 2023 02:23:15 +0000"  >&lt;p&gt;A similar issue also happens in running the io500 test. But the kernel version is different.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12268&quot; title=&quot;LDISKFS-fs error: ldiskfs_find_dest_de:2066: bad entry in directory: rec_len is smaller than minimal - offset=0( 0), inode=201, rec_len=0, name_len=0&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12268&quot;&gt;&lt;del&gt;LU-12268&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="364613" author="xinliang" created="Thu, 2 Mar 2023 02:28:47 +0000"  >&lt;p&gt;Bisected the related commit/patch:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
f94c02917f1d ext4: avoid cycles in directory h-tree ( Which included in openEuler 22.03 LTS kernel kernel-5.10.0-60.58.0.86.oe2203)
ldiskfs/kernel_patches/patches/oe2203/ext4-pdirop.patch (Lustre ldiskfs patch on ext4) &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Workaround:&lt;/p&gt;

&lt;p&gt;revert commit &#8220;f94c02917f1d ext4: avoid cycles in directory h-tree&#8220; and update ext4-pdirop.patch.&lt;/p&gt;

&lt;p&gt;see io500 test suite running result, it is running ok with this workaround.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[openeuler@oe2203-test io500]$ sudo ./io500 config-minimal.ini
IO500 version io500-sc22_v2 (standard)
[RESULT] &#160; &#160; &#160; ior-easy-write &#160; &#160; &#160; &#160;0.103132 GiB/s : time 316.294 seconds
[RESULT] &#160; &#160;mdtest-easy-write &#160; &#160; &#160; &#160;0.067036 kIOPS : time 301.645 seconds
[ &#160; &#160; &#160;] &#160; &#160; &#160; &#160; &#160; &#160;timestamp &#160; &#160; &#160; &#160;0.000000 kIOPS : time 0.000 seconds
[RESULT] &#160; &#160; &#160; ior-hard-write &#160; &#160; &#160; &#160;0.101985 GiB/s : time 312.619 seconds
[RESULT] &#160; &#160;mdtest-hard-write &#160; &#160; &#160; &#160;0.054293 kIOPS : time 301.826 seconds
[RESULT] &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; find &#160; &#160; &#160; &#160;3.992785 kIOPS : time 9.124 seconds
[RESULT] &#160; &#160; &#160; &#160;ior-easy-read &#160; &#160; &#160; &#160;0.023636 GiB/s : time 1380.092 seconds
[RESULT] &#160; &#160; mdtest-easy-stat &#160; &#160; &#160; &#160;0.107839 kIOPS : time 187.558 seconds
[RESULT] &#160; &#160; &#160; &#160;ior-hard-read &#160; &#160; &#160; &#160;0.022159 GiB/s : time 1438.550 seconds
[RESULT] &#160; &#160; mdtest-hard-stat &#160; &#160; &#160; &#160;0.203911 kIOPS : time 81.015 seconds
[RESULT] &#160; mdtest-easy-delete &#160; &#160; &#160; &#160;0.106105 kIOPS : time 190.760 seconds
[RESULT] &#160; &#160; mdtest-hard-read &#160; &#160; &#160; &#160;0.065468 kIOPS : time 250.149 seconds
[RESULT] &#160; mdtest-hard-delete &#160; &#160; &#160; &#160;0.103164 kIOPS : time 159.408 seconds
[SCORE ] Bandwidth 0.048447 GiB/s : IOPS 0.147904 kiops : TOTAL 0.084649
The result files are stored in the directory: ./results/2023.02.22-01.44.09
[openeuler@oe2203-test io500]$ uname -r
5.10.0-60.79.0.103.oe2203.aarch64 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;The commit &#8220;f94c02917f1d ext4: avoid cycles in directory h-tree&#8220; should be ok, we need to tune the ext4-pdirop.patch maybe.&lt;/p&gt;</comment>
                            <comment id="364615" author="xinliang" created="Thu, 2 Mar 2023 02:34:46 +0000"  >&lt;p&gt;It seems this issue related to below two code parts:&lt;/p&gt;

&lt;p&gt;Part1 (introduced by&#160; commit: f94c02917f1d ext4: avoid cycles in directory h-tree)&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
block = dx_get_block(at);
&lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; (i = 0; i &amp;lt;= level; i++) {
&#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (blocks[i] == block) {
&#160; &#160; &#160; &#160; ext4_warning_inode(dir,
&#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-quote&quot;&gt;&quot;dx entry: tree cycle block %u points back to block %u&quot;&lt;/span&gt;,
&#160; &#160; &#160; &#160; &#160; &#160; blocks[level], block);
&#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;goto&lt;/span&gt; fail;
&#160; &#160; }
}&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Part2 (introduced by ext4-pdirop.patch)&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (indirect == level) { &lt;span class=&quot;code-comment&quot;&gt;/* the last index level */&lt;/span&gt;
&#160; &#160; struct ext4_dir_lock_data *ld;
&#160; &#160; u64 myblock;&#160; &#160; /* By &lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; we only lock DE-block, however, we will
&#160; &#160; &#160;* also lock the last level DX-block &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;:
&#160; &#160; &#160;* a) there is hash collision
&#160; &#160; &#160;* &#160; &#160;we will set DX-lock flag (a few lines below)
&#160; &#160; &#160;* &#160; &#160;and redo to lock DX-block
&#160; &#160; &#160;* &#160; &#160;see detail in dx_probe_hash_collision()
&#160; &#160; &#160;* b) it&apos;s a retry from splitting
&#160; &#160; &#160;* &#160; &#160;we need to lock the last level DX-block so nobody
&#160; &#160; &#160;* &#160; &#160;&lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; can split any leaf blocks under the same
&#160; &#160; &#160;* &#160; &#160;DX-block, see detail in ext4_dx_add_entry()
&#160; &#160; &#160;*/
&#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (ext4_htree_dx_locked(lck)) {
&#160; &#160; &#160; &#160; /* DX-block is locked, just lock DE-block
&#160; &#160; &#160; &#160; &#160;* and &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt;
&#160; &#160; &#160; &#160; &#160;*/
&#160; &#160; &#160; &#160; ext4_htree_spin_unlock(lck);
&#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!ext4_htree_safe_locked(lck))
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; ext4_htree_de_lock(lck, frame-&amp;gt;at);

...

&#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (myblock == EXT4_HTREE_NODE_CHANGED) {
&#160; &#160; &#160; &#160; /* someone split &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; DE-block before
&#160; &#160; &#160; &#160; &#160;* I locked it, I need to retry and lock
&#160; &#160; &#160; &#160; &#160;* valid DE-block
&#160; &#160; &#160; &#160; &#160;*/
&#160; &#160; &#160; &#160; ext4_htree_de_unlock(lck);
&#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;continue&lt;/span&gt;;
&#160; &#160; }
&#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; frame;
}&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;After putting part2 after part1, this issue gone.&lt;/p&gt;</comment>
                            <comment id="364618" author="adilger" created="Thu, 2 Mar 2023 02:46:01 +0000"  >&lt;p&gt;Xinliang, great debugging. &lt;/p&gt;</comment>
                            <comment id="364619" author="xinliang" created="Thu, 2 Mar 2023 02:47:28 +0000"  >&lt;p&gt;Verified that rhel9.1 kernel kernel-5.14.0-162.12.1.el9_1 has no this issue.&lt;/p&gt;</comment>
                            <comment id="364754" author="adilger" created="Thu, 2 Mar 2023 19:35:59 +0000"  >&lt;p&gt;Xinliang, since this is a bug in the ldiskfs patch series for that kernel version, can you please submit a patch to update that series with the fix. &lt;/p&gt;</comment>
                            <comment id="364776" author="xinliang" created="Fri, 3 Mar 2023 01:41:37 +0000"  >&lt;p&gt;Andreas, sure. Working on it.&lt;/p&gt;</comment>
                            <comment id="364786" author="gerrit" created="Fri, 3 Mar 2023 04:38:07 +0000"  >&lt;p&gt;&quot;xinliang &amp;lt;xinliang.liu@linaro.org&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/50192&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/50192&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16610&quot; title=&quot;ldiskfs_find_dest_de bad entry in directory when running io500 test&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16610&quot;&gt;&lt;del&gt;LU-16610&lt;/del&gt;&lt;/a&gt; ldiskfs: fix directory corruption on openeuler 22.03&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9280d8c880e534629df89b7d40e531297e701c99&lt;/p&gt;</comment>
                            <comment id="366771" author="gerrit" created="Tue, 21 Mar 2023 23:16:24 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/50192/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/50192/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16610&quot; title=&quot;ldiskfs_find_dest_de bad entry in directory when running io500 test&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16610&quot;&gt;&lt;del&gt;LU-16610&lt;/del&gt;&lt;/a&gt; ldiskfs: fix directory corruption on openeuler 22.03&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 85b76aa91a3999a325a9ef970f0cc8b6dd1cdda7&lt;/p&gt;</comment>
                            <comment id="366818" author="pjones" created="Wed, 22 Mar 2023 01:57:18 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="55581">LU-12268</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03fhb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>