<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:16:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-15238] lfsck crashes MDT LDISKFS-fs error (device md65): ldiskfs_xattr_inode_iget:407: comm lfsck: EA inode 2047917093 does not have LDISKFS_EA_INODE_FL flag</title>
                <link>https://jira.whamcloud.com/browse/LU-15238</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[458781.070693] LDISKFS-fs error (device md65): ldiskfs_xattr_inode_iget:407: comm lfsck: EA inode 2047917093 does not have LDISKFS_EA_INODE_FL flag
[458781.136989] Aborting journal on device md65-8.
[458781.142323] LDISKFS-fs error (device md65) in ldiskfs_evict_inode:251: Journal has aborted
[458781.153243] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.155099] LustreError: 98016:0:(osd_handler.c:1783:osd_trans_commit_cb()) transaction @0x000000002c9fd616 commit error: 2
[458781.158848] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.170295] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.170297] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.175978] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.182078] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.199967] Kernel panic - not syncing: LDISKFS-fs (device md65): panic forced after error

[458781.199972] LDISKFS-fs (md65): Remounting filesystem read-only
[458781.199979] LDISKFS-fs (md65): Remounting filesystem read-only
[458781.200005] LDISKFS-fs (md65): Remounting filesystem read-only
[458781.200549] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.200552] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.200840] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.201096] LDISKFS-fs (md65): Remounting filesystem read-only
[458781.260424] LDISKFS-fs error (device md65): ldiskfs_journal_check_start:61: Detected aborted journal
[458781.262419] CPU: 4 PID: 2861532 Comm: lfsck Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-305.10.2.x6.0.24.x86_64 #1
[458781.262421] Hardware name: Seagate Laguna Seca/Laguna Seca, BIOS v02.0040 06/29/2018
[458781.333307] Call Trace:
[458781.336774]  dump_stack+0x5c/0x80
[458781.341219]  panic+0xe7/0x2a9
[458781.345208]  ? wake_up_q+0x54/0x80
[458781.349955]  ldiskfs_handle_error.cold.139+0x13/0x13 [ldiskfs]
[458781.356863]  __ldiskfs_error+0x8b/0x100 [ldiskfs]
[458781.362710]  ? ldiskfs_htree_fill_tree+0xa0/0x2d0 [ldiskfs]
[458781.369344]  ldiskfs_xattr_inode_iget+0xf4/0x170 [ldiskfs]
[458781.375883]  ldiskfs_xattr_inode_get+0x4c/0x1e0 [ldiskfs]
[458781.382279]  ? xattr_find_entry+0x95/0x110 [ldiskfs]
[458781.388253]  ldiskfs_xattr_ibody_get+0x15f/0x180 [ldiskfs]
[458781.394742]  ldiskfs_xattr_get+0x85/0x2d0 [ldiskfs]
[458781.400634]  __vfs_getxattr+0x53/0x70
[458781.405326]  osd_xattr_get+0x167/0x650 [osd_ldiskfs]
[458781.411326]  lfsck_layout_get_lovea.part.77+0x6c/0x260 [lfsck]
[458781.418171]  lfsck_layout_master_exec_oit+0x1b5/0xc90 [lfsck]
[458781.424910]  lfsck_master_oit_engine+0xc52/0x1360 [lfsck]
[458781.432113]  lfsck_master_engine+0x50e/0xcd0 [lfsck]
[458781.438056]  ? finish_wait+0x80/0x80
[458781.442568]  ? lfsck_master_oit_engine+0x1360/0x1360 [lfsck]
[458781.449177]  kthread+0x116/0x130
[458781.453342]  ? kthread_flush_work_fn+0x10/0x10
[458781.458686]  ret_from_fork+0x1f/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And many backtraces:&lt;/p&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[456491.541627]  ret_from_fork+0x1f/0x40
[456491.547490] CPU: 1 PID: 2861532 Comm: lfsck Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-305.10.2.x6.0.24.x86_64 #1
[456491.561264] Hardware name: Seagate Laguna Seca/Laguna Seca, BIOS v02.0040 06/29/2018
[456491.569958] Call Trace:
[456491.573363]  dump_stack+0x5c/0x80
[456491.577599]  lfsck_trans_create.part.58+0x63/0x70 [lfsck]
[456491.583966]  lfsck_namespace_trace_update+0xa3b/0xa50 [lfsck]
[456491.590650]  lfsck_namespace_exec_oit+0x4b3/0x990 [lfsck]
[456491.597048]  ? down_write+0xe/0x40
[456491.601438]  lfsck_master_oit_engine+0xc52/0x1360 [lfsck]
[456491.607787]  lfsck_master_engine+0x50e/0xcd0 [lfsck]
[456491.613699]  ? finish_wait+0x80/0x80
[456491.618187]  ? lfsck_master_oit_engine+0x1360/0x1360 [lfsck]
[456491.624716]  kthread+0x116/0x130
[456491.628964]  ? kthread_flush_work_fn+0x10/0x10
[456491.634325]  ret_from_fork+0x1f/0x40
[456494.228001] CPU: 18 PID: 2861532 Comm: lfsck Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-305.10.2.x6.0.24.x86_64 #1
[456494.241276] Hardware name: Seagate Laguna Seca/Laguna Seca, BIOS v02.0040 06/29/2018
[456494.249695] Call Trace:
[456494.252853]  dump_stack+0x5c/0x80
[456494.256885]  lfsck_trans_create.part.58+0x63/0x70 [lfsck]
[456494.262955]  lfsck_namespace_trace_update+0xa3b/0xa50 [lfsck]
[456494.269296]  lfsck_namespace_exec_oit+0x4b3/0x990 [lfsck]
[456494.275275]  ? down_write+0xe/0x40
[456494.279264]  lfsck_master_oit_engine+0xc52/0x1360 [lfsck]
[456494.285258]  lfsck_master_engine+0x50e/0xcd0 [lfsck]
[456494.290924]  ? finish_wait+0x80/0x80
[456494.295116]  ? lfsck_master_oit_engine+0x1360/0x1360 [lfsck]
[456494.301388]  kthread+0x116/0x130
[456494.305199]  ? kthread_flush_work_fn+0x10/0x10
[456494.310227]  ret_from_fork+0x1f/0x40
[456494.314569] CPU: 8 PID: 2861532 Comm: lfsck Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-305.10.2.x6.0.24.x86_64 #1
[456494.338328] Hardware name: Seagate Laguna Seca/Laguna Seca, BIOS v02.0040 06/29/2018
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt -l
PID: 2861532 TASK: ffff9c083c05af80 CPU: 4 COMMAND: &quot;lfsck&quot;
#0 [ffffbd866a4cf8f0] machine_kexec at ffffffff9dc6156e
/usr/src/debug/kernel-4.18.0-305.10.2.x6.0.24/linux-4.18.0-305.10.2.x6.0.24.x86_64/arch/x86/kernel/machine_kexec_64.c: 389
#1 [ffffbd866a4cf948] __crash_kexec at ffffffff9dd8f94d
/usr/src/debug/kernel-4.18.0-305.10.2.x6.0.24/linux-4.18.0-305.10.2.x6.0.24.x86_64/kernel/kexec_core.c: 957
#2 [ffffbd866a4cfa10] panic at ffffffff9dce0dc7
/usr/src/debug/kernel-4.18.0-305.10.2.x6.0.24/linux-4.18.0-305.10.2.x6.0.24.x86_64/./arch/x86/include/asm/smp.h: 72
#3 [ffffbd866a4cfaa0] __ldiskfs_error at ffffffffc1a9252b [ldiskfs]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/ldiskfs/inode.c: 4523
#4 [ffffbd866a4cfb48] ldiskfs_xattr_inode_iget at ffffffffc1a5cf14 [ldiskfs]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/ldiskfs/trace/events/ldiskfs.h: 2666
#5 [ffffbd866a4cfb80] ldiskfs_xattr_inode_get at ffffffffc1a5fd9c [ldiskfs]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/ldiskfs/trace/events/ldiskfs.h: 1775
#6 [ffffbd866a4cfbe0] ldiskfs_xattr_ibody_get at ffffffffc1a601ef [ldiskfs]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/ldiskfs/ldiskfs.h: 1572
#7 [ffffbd866a4cfc48] ldiskfs_xattr_get at ffffffffc1a60295 [ldiskfs]
/usr/src/kernels/4.18.0-305.10.2.x6.0.24.x86_64/include/linux/quotaops.h: 19
#8 [ffffbd866a4cfca0] __vfs_getxattr at ffffffff9df43223
/usr/src/debug/kernel-4.18.0-305.10.2.x6.0.24/linux-4.18.0-305.10.2.x6.0.24.x86_64/fs/xattr.c: 374
#9 [ffffbd866a4cfcd0] osd_xattr_get at ffffffffc1b28c07 [osd_ldiskfs]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/lustre/include/lustre_compat.h: 540
#10 [ffffbd866a4cfd18] lfsck_layout_get_lovea at ffffffffc158bd5c [lfsck]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/lustre/include/dt_object.h: 2875
#11 [ffffbd866a4cfd50] lfsck_layout_master_exec_oit at ffffffffc1597025 [lfsck]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/lustre/lfsck/lfsck_layout.c: 5711
#12 [ffffbd866a4cfe08] lfsck_master_oit_engine at ffffffffc1560de2 [lfsck]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/lustre/lfsck/lfsck_engine.c: 531
#13 [ffffbd866a4cfe78] lfsck_master_engine at ffffffffc15619fe [lfsck]
/home/centos/rpmbuild/BUILD/lustre-2.14.55_81_gc26b347/lustre/lfsck/lfsck_engine.c: 1083
#14 [ffffbd866a4cff10] kthread at ffffffff9dd043a6
/usr/src/debug/kernel-4.18.0-305.10.2.x6.0.24/linux-4.18.0-305.10.2.x6.0.24.x86_64/kernel/kthread.c: 319
#15 [ffffbd866a4cff50] ret_from_fork at ffffffff9e60023f
/usr/src/debug/kernel-4.18.0-305.10.2.x6.0.24/linux-4.18.0-305.10.2.x6.0.24.x86_64/arch/x86/entry/entry_64.S: 319
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;With (READ ONLY) lfsck enabled this crash persisted after rebooting, running e2fsck and raid re-sysc.&lt;/p&gt;

&lt;p&gt;lfsck was eventually cleared by running lctl lfsck_stop on the MDT nodes as early as possible in the mount (and/or failback) until no more lfsck activity was observed.&lt;/p&gt;</description>
                <environment>Server: RHEL8</environment>
        <key id="67195">LU-15238</key>
            <summary>lfsck crashes MDT LDISKFS-fs error (device md65): ldiskfs_xattr_inode_iget:407: comm lfsck: EA inode 2047917093 does not have LDISKFS_EA_INODE_FL flag</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="stancheff">Shaun Tancheff</reporter>
                        <labels>
                    </labels>
                <created>Tue, 16 Nov 2021 13:25:50 +0000</created>
                <updated>Thu, 20 Jan 2022 20:39:44 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="318334" author="spitzcor" created="Tue, 16 Nov 2021 14:58:29 +0000"  >&lt;p&gt;I&apos;ve bumped this to blocker because once you fall into the trap you can&apos;t (easily) get out of it.&lt;/p&gt;</comment>
                            <comment id="319283" author="adilger" created="Sat, 27 Nov 2021 00:08:12 +0000"  >&lt;p&gt;My first guess here would be that there is some mismatch in how the RHEL8 kernel is implementing the &quot;&lt;tt&gt;ea_inode&lt;/tt&gt;&quot; feature when it was ported to the upstream kernel vs. how it was patched into ldiskfs previously.  That said, the on-disk xattr storage should be internal to ldiskfs, and osd-ldiskfs shouldn&apos;t even see that (let alone higher levels), so there does seem to be something wrong in ldiskfs/ext4 itself to get an inconsistency like this.&lt;/p&gt;</comment>
                            <comment id="319284" author="pjones" created="Sat, 27 Nov 2021 00:17:17 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=spitzcor&quot; class=&quot;user-hover&quot; rel=&quot;spitzcor&quot;&gt;spitzcor&lt;/a&gt; iwho on your team working on this issue? &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=artem_blagodarenko&quot; class=&quot;user-hover&quot; rel=&quot;artem_blagodarenko&quot;&gt;artem_blagodarenko&lt;/a&gt; perhaps?&lt;/p&gt;</comment>
                            <comment id="319307" author="artem_blagodarenko" created="Mon, 29 Nov 2021 06:50:40 +0000"  >&lt;p&gt;&amp;gt;Cory Spitz iwho on your team working on this issue? Artem Blagodarenko perhaps?&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=pjones&quot; class=&quot;user-hover&quot; rel=&quot;pjones&quot;&gt;pjones&lt;/a&gt;, yes, I do.&lt;/p&gt;</comment>
                            <comment id="321923" author="spitzcor" created="Thu, 6 Jan 2022 20:01:29 +0000"  >&lt;p&gt;In case it wasn&apos;t clear that this bug is about lfsck crashing, not the on-disk defect.  &lt;b&gt;I think&lt;/b&gt; &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15265&quot; title=&quot;e2fsck test reports &amp;quot;Free blocks count wrong for group #705232 (33791, counted=32768)&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15265&quot;&gt;&lt;del&gt;LU-15265&lt;/del&gt;&lt;/a&gt; may be tracking that.  It needs to be confirmed yet.&lt;/p&gt;</comment>
                            <comment id="323359" author="adilger" created="Thu, 20 Jan 2022 19:31:16 +0000"  >&lt;p&gt;Cory mentioned that this may be fallout from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15404&quot; title=&quot;kernel panic and filesystem corruption in setxattr due to journal transaction restart&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15404&quot;&gt;&lt;del&gt;LU-15404&lt;/del&gt;&lt;/a&gt;, when the large xattr has failed to be unlinked because of transaction credits, so it may be that this problem goes away when that issue is fixed (i.e. it may not leave a large xattr inode in the filesystem without  &lt;tt&gt;LDISKFS_EA_INODE_FL&lt;/tt&gt; set).&lt;/p&gt;

&lt;p&gt;It probably makes sense to change this case from &lt;tt&gt;ext4_error()&lt;/tt&gt; to &lt;tt&gt;ext4_warning_inode()&lt;/tt&gt; or similar, and return &lt;tt&gt;-EIO&lt;/tt&gt; when accessing that large xattr so that it doesn&apos;t cause the filesystem to be remounted read-only?  That would be a lot more robust, and would only affect the one inode&apos;s xattr.&lt;/p&gt;</comment>
                            <comment id="323371" author="spitzcor" created="Thu, 20 Jan 2022 20:33:13 +0000"  >&lt;p&gt;This bug is fallout from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15404&quot; title=&quot;kernel panic and filesystem corruption in setxattr due to journal transaction restart&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15404&quot;&gt;&lt;del&gt;LU-15404&lt;/del&gt;&lt;/a&gt;.  &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=zam&quot; class=&quot;user-hover&quot; rel=&quot;zam&quot;&gt;zam&lt;/a&gt; wrote in an internal HPE ticket:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;the&amp;#93;&lt;/span&gt; bug may cause EA pointer to point to old EA inode (freed or reused within committed transaction) with the symptoms from this ticket.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;That said, it would be ideal if lfsck would handle the situation gracefully, instead of crashing.  Let&apos;s downgrade this issue knowing that it won&apos;t happen (in this way) if the corruption from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15404&quot; title=&quot;kernel panic and filesystem corruption in setxattr due to journal transaction restart&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15404&quot;&gt;&lt;del&gt;LU-15404&lt;/del&gt;&lt;/a&gt; is addressed.  Then, the scope of this ticket will focus on making lfsck gracefully handle the condition instead (for example, as with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14105&quot; title=&quot;lfsck shouldn&amp;#39;t LBUG() on disk data&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14105&quot;&gt;&lt;del&gt;LU-14105&lt;/del&gt;&lt;/a&gt;).&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="67293">LU-15265</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="67776">LU-15404</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i02a1z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>