<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:56:00 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12827] sanity: test 900 read-only filesystem during cleanup: ldiskfs_xattr_block_get() bad block </title>
                <link>https://jira.whamcloud.com/browse/LU-12827</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Andreas Dilger  &amp;lt;adilger@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/2161d6cc-de2e-11e9-add9-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/2161d6cc-de2e-11e9-add9-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Seen in the test log a large number of errors during cleanup:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;rm: cannot remove &apos;/mnt/lustre/d56w.sanity/dir1/link164&apos;: Read-only file system
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Seen on the MDS console log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LDISKFS-fs error (device dm-3): ldiskfs_xattr_block_get:405: inode #1267: comm mdt00_002: bad block 152837
Aborting journal on device dm-3-8.
LustreError: 993:0:(osd_handler.c:1708:osd_trans_commit_cb()) transaction @0xffff8ab195065900 commit error: 2
LDISKFS-fs (dm-3): Remounting filesystem read-only
LustreError: 1012:0:(mdt_handler.c:1224:mdt_getattr_internal()) lustre-MDT0000: getattr error for [0x2000013a1:0xd880:0x0]: rc = -5
LustreError: 1042:0:(llog_cat.c:762:llog_cat_cancel_records()) lustre-OST0005-osc-MDT0000: fail to cancel 1 of 1 llog-records: rc = -30
LustreError: 1042:0:(osp_sync.c:1065:osp_sync_process_committed()) lustre-OST0005-osc-MDT0000: can&apos;t cancel record: -30
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="57081">LU-12827</key>
            <summary>sanity: test 900 read-only filesystem during cleanup: ldiskfs_xattr_block_get() bad block </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="4" iconUrl="https://jira.whamcloud.com/images/icons/statuses/reopened.png" description="This issue was once resolved, but the resolution was deemed incorrect. From here issues are either marked assigned or resolved.">Reopened</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="dongyang">Dongyang Li</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 1 Oct 2019 11:41:51 +0000</created>
                <updated>Fri, 18 Oct 2019 05:37:15 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="255806" author="adilger" created="Thu, 3 Oct 2019 02:30:40 +0000"  >&lt;p&gt;This appears to have been related to the patch being tested.&lt;/p&gt;</comment>
                            <comment id="256046" author="gerrit" created="Tue, 8 Oct 2019 10:06:46 +0000"  >&lt;p&gt;Li Dongyang (dongyangli@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/36402&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36402&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12827&quot; title=&quot;sanity: test 900 read-only filesystem during cleanup: ldiskfs_xattr_block_get() bad block &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12827&quot;&gt;LU-12827&lt;/a&gt; ldiskfs: debug patch&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e07f42eb8e830b99a530da23630ed2bce6c288f7&lt;/p&gt;</comment>
                            <comment id="256099" author="dongyang" created="Tue, 8 Oct 2019 23:39:19 +0000"  >&lt;p&gt;The debug patch triggered another error with metadata_csum:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/2985fd8c-c1cb-4c8a-887b-bbb051056eb3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/2985fd8c-c1cb-4c8a-887b-bbb051056eb3&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;and it crashed this time:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 3755.461295] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x8000019a
[ 3755.793142] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x8000019a
[ 3756.134861] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x8000019a
[ 3756.474540] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x8000019a
[ 3756.820665] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x8000019a
[ 3757.166268] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0x8000019a
[ 3757.504059] Lustre: DEBUG MARKER: /usr/sbin/lctl set_param fail_loc=0
[ 3757.826509] Lustre: DEBUG MARKER: /usr/sbin/lctl lfsck_start -M lustre-MDT0000 -A -C -t namespace
[ 3758.000591] LDISKFS-fs error (device dm-3): ldiskfs_iget:4459: inode #1370: comm OI_scrub: checksum invalid
[ 3758.002430] Aborting journal on device dm-3-8.
[ 3758.003354] LDISKFS-fs (dm-3): Remounting filesystem read-only
[ 3758.003511] LustreError: 7527:0:(osd_handler.c:1727:osd_trans_commit_cb()) transaction @0xffff8e8c9c06c200 commit error: 2
[ 3758.015734] LustreError: 18534:0:(scrub.c:236:scrub_file_store()) lustre-MDT0000: store scrub file: rc = -30
[ 3758.175022] Lustre: DEBUG MARKER: /usr/sbin/lctl get_param -n 			mdd.lustre-MDT0000.lfsck_namespace |
[ 3758.175022] 			awk &lt;span class=&quot;code-quote&quot;&gt;&apos;/^status/ { print $2 }&apos;&lt;/span&gt;
[ 3758.210580] LDISKFS-fs error (device dm-3): ldiskfs_iget:4459: inode #1370: comm lfsck: checksum invalid
[ 3758.213788] LDISKFS-fs error (device dm-3): ldiskfs_iget:4459: inode #1370: comm lfsck_namespace: checksum invalid
[ 3758.216336] BUG: unable to handle kernel NULL pointer dereference at 000000000000001b
[ 3758.217805] IP: [&amp;lt;ffffffffb6e61d48&amp;gt;] is_bad_inode+0x8/0x20
[ 3758.218855] PGD 0 
[ 3758.219258] Oops: 0000 [#1] SMP 
[ 3758.219944] Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) mgc(OE) osd_ldiskfs(OE) lquota(OE) fid(OE) fld(OE) ksocklnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) ldiskfs(OE) libcfs(OE) dm_flakey rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core sunrpc dm_mod iosf_mbi crc32_pclmul ghash_clmulni_intel ppdev aesni_intel parport_pc lrw gf128mul glue_helper virtio_balloon ablk_helper parport cryptd joydev pcspkr i2c_piix4 ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_blk 8139too crct10dif_pclmul crct10dif_common ata_piix crc32c_intel serio_raw virtio_pci libata virtio_ring virtio 8139cp mii floppy [last unloaded: llog_test]
[ 3758.236589] CPU: 1 PID: 18535 Comm: lfsck_namespace Kdump: loaded Tainted: G           OE  ------------   3.10.0-957.27.2.el7_lustre.x86_64 #1
[ 3758.238786] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 3758.239778] task: ffff8e8c9d5be180 ti: ffff8e8c9ac64000 task.ti: ffff8e8c9ac64000
[ 3758.241087] RIP: 0010:[&amp;lt;ffffffffb6e61d48&amp;gt;]  [&amp;lt;ffffffffb6e61d48&amp;gt;] is_bad_inode+0x8/0x20
[ 3758.242505] RSP: 0018:ffff8e8c9ac679e8  EFLAGS: 00010246
[ 3758.243450] RAX: 0000000000000000 RBX: fffffffffffffffb RCX: 0000000000000000
[ 3758.244686] RDX: 0000000000000002 RSI: ffff8e8ca04cdec0 RDI: fffffffffffffffb
[ 3758.245919] RBP: ffff8e8c9ac67a20 R08: ffff8e8c9accb898 R09: ffff8e8c9d5be180
[ 3758.247150] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8e8ca066d1a8
[ 3758.248392] R13: 000000000000055a R14: ffff8e8c9ac67a48 R15: ffff8e8ca066d1a8
[ 3758.249626] FS:  0000000000000000(0000) GS:ffff8e8cbfd00000(0000) knlGS:0000000000000000
[ 3758.251037] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3758.252024] CR2: 000000000000001b CR3: 00000000780a8000 CR4: 00000000000606e0
[ 3758.253294] Call Trace:
[ 3758.253823]  [&amp;lt;ffffffffc09a8ec3&amp;gt;] ? ldiskfs_xattr_inode_iget+0x143/0x180 [ldiskfs]
[ 3758.255145]  [&amp;lt;ffffffffc09a8f35&amp;gt;] ldiskfs_xattr_inode_get+0x35/0x130 [ldiskfs]
[ 3758.256424]  [&amp;lt;ffffffffc09a92a0&amp;gt;] ldiskfs_xattr_ibody_get+0x160/0x1a0 [ldiskfs]
[ 3758.257699]  [&amp;lt;ffffffffc09a934d&amp;gt;] ldiskfs_xattr_get+0x6d/0x360 [ldiskfs]
[ 3758.258896]  [&amp;lt;ffffffffb6ccfeb4&amp;gt;] ? __wake_up+0x44/0x50
[ 3758.259854]  [&amp;lt;ffffffffc09e805a&amp;gt;] ldiskfs_xattr_trusted_get+0x2a/0x30 [ldiskfs]
[ 3758.261127]  [&amp;lt;ffffffffb6e6a522&amp;gt;] generic_getxattr+0x52/0x70
[ 3758.262199]  [&amp;lt;ffffffffc1075bae&amp;gt;] osd_xattr_get+0x17e/0x7e0 [osd_ldiskfs]
[ 3758.263410]  [&amp;lt;ffffffffc097be00&amp;gt;] ? fld_server_lookup+0x20/0x320 [fld]
[ 3758.264562]  [&amp;lt;ffffffffb6cddd9e&amp;gt;] ? account_entity_dequeue+0xae/0xd0
[ 3758.265695]  [&amp;lt;ffffffffc116aa8f&amp;gt;] __lfsck_links_read+0x7f/0x2d0 [lfsck]
[ 3758.266859]  [&amp;lt;ffffffffc117707b&amp;gt;] lfsck_namespace_assistant_handler_p1+0x3fb/0x2090 [lfsck]
[ 3758.268348]  [&amp;lt;ffffffffb6c2a59e&amp;gt;] ? __switch_to+0xce/0x580
[ 3758.269326]  [&amp;lt;ffffffffb6cd1a00&amp;gt;] ? rq_attach_root+0x90/0x110
[ 3758.270322]  [&amp;lt;ffffffffc115a0bf&amp;gt;] lfsck_assistant_engine+0x3cf/0x20c0 [lfsck]
[ 3758.271576]  [&amp;lt;ffffffffb6cd7c40&amp;gt;] ? wake_up_state+0x20/0x20
[ 3758.272572]  [&amp;lt;ffffffffc1159cf0&amp;gt;] ? lfsck_master_engine+0x1370/0x1370 [lfsck]
[ 3758.273818]  [&amp;lt;ffffffffb6cc2e81&amp;gt;] kthread+0xd1/0xe0
[ 3758.274695]  [&amp;lt;ffffffffb6cc2db0&amp;gt;] ? insert_kthread_work+0x40/0x40
[ 3758.275787]  [&amp;lt;ffffffffb7377c37&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[ 3758.276929]  [&amp;lt;ffffffffb6cc2db0&amp;gt;] ? insert_kthread_work+0x40/0x40
[ 3758.277994] Code: 00 00 00 66 66 66 66 90 55 b8 fb ff ff ff 48 89 e5 5d c3 66 66 66 66 90 55 b8 fb ff ff ff 48 89 e5 5d c3 66 66 66 66 90 55 31 c0 &amp;lt;48&amp;gt; 81 7f 20 40 e4 42 b7 48 89 e5 5d 0f 94 c0 c3 0f 1f 84 00 00 
[ 3758.283591] RIP  [&amp;lt;ffffffffb6e61d48&amp;gt;] is_bad_inode+0x8/0x20
[ 3758.284617]  RSP &amp;lt;ffff8e8c9ac679e8&amp;gt;
[ 3758.285244] CR2: 000000000000001b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Looks like this time it&apos;s an xattr inode failed checksum verify, and it crashed as a result:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
struct inode *ext4_xattr_inode_iget(struct inode *parent,
                                    unsigned &lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt; ea_ino, &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; *err)
{
        struct inode *ea_inode = NULL;        ea_inode = ext4_iget(parent-&amp;gt;i_sb, ea_ino);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) {
                &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0;
                ext4_error(parent-&amp;gt;i_sb, &lt;span class=&quot;code-quote&quot;&gt;&quot;error &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; reading EA inode %lu &quot;&lt;/span&gt;
                           &lt;span class=&quot;code-quote&quot;&gt;&quot;/ %d %d&quot;&lt;/span&gt;, ea_ino, rc, is_bad_inode(ea_inode));
                *err = rc != 0 ? rc : -EIO;
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; NULL;
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;the ea_inode is an error value, the second call to is_bad_inode will crash.&lt;/p&gt;

&lt;p&gt;Looks like we need upstream commit&#160;bab79b04999ccbbf59f1693d0783cd6ae27e4278 for rhel7 kernels to stop it crashing after the checksum verify failure from ext4_iget...&lt;/p&gt;</comment>
                            <comment id="256103" author="adilger" created="Wed, 9 Oct 2019 04:37:06 +0000"  >&lt;p&gt;Dongyang, could you please add a backported patch for the problem you found.&lt;/p&gt;</comment>
                            <comment id="256627" author="gerrit" created="Fri, 18 Oct 2019 05:37:15 +0000"  >&lt;p&gt;Li Dongyang (dongyangli@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/36484&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36484&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12827&quot; title=&quot;sanity: test 900 read-only filesystem during cleanup: ldiskfs_xattr_block_get() bad block &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12827&quot;&gt;LU-12827&lt;/a&gt; ldiskfs: fix ldiskfs_xattr_inode_iget error checking&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 11667154eee1ea60c2179fa5ba152a23825cc2a1&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="54772">LU-11922</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00nl3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>