<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:15:56 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8252] MDS kernel panic after aborting journal</title>
                <link>https://jira.whamcloud.com/browse/LU-8252</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We&apos;re having an issue with our mds crashing. This is after recovering from a full md filesystem. We&apos;ve been deleting from storage to free up metadata space, but have run into these kernel panics.&lt;/p&gt;

&lt;p&gt;dmesg logs have the following:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;2&amp;gt;LDISKFS-jfs error (device md0): ldiskfs_mb_check_ondisk_bitmap: on-disk bitmap for group 0corrupted: 57 blocks free in bitmap, 6 - in gd
&amp;lt;4&amp;gt;
&amp;lt;3&amp;gt;Aborting journal on device md0-8.
&amp;lt;2&amp;gt;LDISKFS-fs error (device md0): ldiskfs_journal_start_sb: Detected aborted journal
&amp;lt;2&amp;gt;LDISKFS-fs error (device md0) in iam_txn_add: Journal has aborted
&amp;lt;2&amp;gt;LDISKFS-fs (md0): Remounting filesystem read-only
&amp;lt;2&amp;gt;LDISKFS-fs (md0): Remounting filesystem read-only
&amp;lt;3&amp;gt;LustreError: 6919:0:(osd_io.c:1173:osd_ldiskfs_write_record()) journal_get_write_access() returned error -30
&amp;lt;3&amp;gt;LustreError: 6919:0:(osd_handler.c:1054:osd_trans_stop()) Failure in transaction hook: -30
&amp;lt;3&amp;gt;LustreError: 6919:0:(osd_handler.c:1063:osd_trans_stop()) Failure to stop transaction: -30
&amp;lt;2&amp;gt;LDISKFS-fs error (device md0): ldiskfs_mb_new_blocks: Updating bitmap error: [err -30] [pa ffff8860350c8ba8] [phy 34992896] [logic 256] [len 256] [free 256] [error 1] [inode 1917]
&amp;lt;3&amp;gt;LustreError: 8967:0:(osd_io.c:1166:osd_ldiskfs_write_record()) md0: error reading offset 2093056 (block 511): rc = -30
&amp;lt;3&amp;gt;LustreError: 8967:0:(llog_osd.c:156:llog_osd_write_blob()) echo-MDT0000-osd: error writing log record: rc = -30
&amp;lt;2&amp;gt;LDISKFS-fs error (device md0) in start_transaction: Journal has aborted
&amp;lt;2&amp;gt;LDISKFS-fs error (device md0) in start_transaction: Journal has aborted
&amp;lt;3&amp;gt;LustreError: 8967:0:(llog_cat.c:356:llog_cat_add_rec()) llog_write_rec -30: lh=ffff88601d1e4b40
&amp;lt;4&amp;gt;
&amp;lt;3&amp;gt;LustreError: 5801:0:(osd_handler.c:863:osd_trans_commit_cb()) transaction @0xffff882945fc28c0 commit error: 2
&amp;lt;0&amp;gt;LustreError: 6145:0:(osp_sync.c:874:osp_sync_thread()) ASSERTION( rc == 0 || rc == LLOG_PROC_BREAK ) failed: 11 changes, 31 in progress, 0 in flight: -5
&amp;lt;0&amp;gt;LustreError: 6145:0:(osp_sync.c:874:osp_sync_thread()) LBUG
&amp;lt;4&amp;gt;Pid: 6145, comm: osp-syn-98-0
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b3895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b3e97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0eff2e3&amp;gt;] osp_sync_thread+0x753/0x7d0 [osp]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81528df6&amp;gt;] ? schedule+0x176/0x3b0
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0efeb90&amp;gt;] ? osp_sync_thread+0x0/0x7d0 [osp]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;
&amp;lt;3&amp;gt;LustreError: 6135:0:(llog.c:159:llog_cancel_rec()) echo-OST005d-osc-MDT0000: fail to write header for llog #0x5552:1#00000000: rc = -30
&amp;lt;3&amp;gt;LustreError: 6135:0:(llog_cat.c:538:llog_cat_cancel_records()) echo-OST005d-osc-MDT0000: fail to cancel 1 of 1 llog-records: rc = -30
&amp;lt;3&amp;gt;LustreError: 6135:0:(osp_sync.c:721:osp_sync_process_committed()) echo-OST005d-osc-MDT0000: can&apos;t cancel record: -30
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 6145, comm: osp-syn-98-0 Not tainted 2.6.32-431.23.3.el6_lustre.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8152896c&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b3eeb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0eff2e3&amp;gt;] ? osp_sync_thread+0x753/0x7d0 [osp]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81528df6&amp;gt;] ? schedule+0x176/0x3b0
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0efeb90&amp;gt;] ? osp_sync_thread+0x0/0x7d0 [osp]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109abf6&amp;gt;] ? kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Centos 6.5&lt;br/&gt;
Linux 2.6.32-431.23.3.el6_lustre.x86_64</environment>
        <key id="37490">LU-8252</key>
            <summary>MDS kernel panic after aborting journal</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ys">Yang Sheng</assignee>
                                    <reporter username="cyb">Cory Brassington</reporter>
                        <labels>
                    </labels>
                <created>Wed, 8 Jun 2016 22:13:51 +0000</created>
                <updated>Wed, 1 Feb 2017 17:00:51 +0000</updated>
                            <resolved>Tue, 31 Jan 2017 20:15:43 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="155257" author="adilger" created="Thu, 9 Jun 2016 17:45:00 +0000"  >&lt;p&gt;The core of the problem is an inconsistency in the MDT filesystem that caused the filesystem to be remounted read-only (-30 = -EROFS) to prevent cascading corruption.  This will need a full e2fsck run to repair: &lt;tt&gt;e2fsck -fy /dev/&amp;lt;MDTDEV&amp;gt;&lt;/tt&gt;.  Please ensure you have the latest e2fsprogs-1.42.13.wc5 installed, which fixes a number of issues in older versions.  You may want to consider doing a dry-run &lt;tt&gt;e2fsck -fn /dev/&amp;lt;MDTDEV&amp;gt;&lt;/tt&gt; to log and review what problems are reported before starting the repair.  If you want to be especially careful, you can create a snapshot of the MDT device, or do a full device-level backup (&lt;tt&gt;dd if=/dev/&amp;lt;MDTDEV&amp;gt; of=/dev/backup bs=1M&lt;/tt&gt;) in case of problems.&lt;/p&gt;

&lt;p&gt;The LBUG is the result of the read-only filesystem not being handled robustly, which is fixed by a patch &lt;a href=&quot;http://review.whamcloud.com/19856&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19856&lt;/a&gt; &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6696&quot; title=&quot;ASSERTION( rc == 0 || rc == LLOG_PROC_BREAK ) failed: 0 changes, 0 in progress, 0 in flight: -5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6696&quot;&gt;&lt;del&gt;LU-6696&lt;/del&gt;&lt;/a&gt; llog: improve error handling&quot;, though it is not particularly beneficial to apply this patch to your system once the MDT problem is resolved, since by the time this assertion is hit the read-only MDT is not very useful anymore.&lt;/p&gt;</comment>
                            <comment id="155360" author="sfw" created="Fri, 10 Jun 2016 16:15:53 +0000"  >&lt;p&gt;Looks like the e2fsck failed out with:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Inode 3002126196 has a bad extended attribute block 14139.  Clear? yes

Inode 3002126196, i_blocks is 8, should be 0.  Fix? yes

Inode 3002126200 has a bad extended attribute block 14140.  Clear? yes

Inode 3002126200, i_blocks is 8, should be 0.  Fix? yes

Pass 2: Checking directory structure
Illegal inode number passed to ext2fs_test_inode_bitmap #0 for in-use inode map
Directory inode 3040644673, block #0, offset 0: directory corrupted
Salvage? yes

First entry &apos;&apos; (inode=14094) in directory inode 3040644673 (???) should be &apos;.&apos;
Fix? yes

Setting filetype for entry &apos;.&apos; in ??? (3040644673) to 2.
Missing &apos;..&apos; in directory inode 3040644673.
Fix? yes

Setting filetype for entry &apos;..&apos; in ??? (3040644673) to 2.
Directory inode 3040644678, block #0, offset 0: directory corrupted
Salvage? yes

Missing &apos;.&apos; in directory inode 3040644678.
Fix? yes

Setting filetype for entry &apos;.&apos; in ??? (3040644678) to 2.
Missing &apos;..&apos; in directory inode 3040644678.
Fix? yes

Setting filetype for entry &apos;..&apos; in ??? (3040644678) to 2.
Internal error: couldn&apos;t find dir_info for 3040644597.

echo-MDT0000: ***** FILE SYSTEM WAS MODIFIED *****
e2fsck: aborted

echo-MDT0000: ***** FILE SYSTEM WAS MODIFIED *****
emds1 /root #&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;ve subsequently mounted it successfully albeit with warnings:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jun 10 08:44:13 emds1 kernel: LDISKFS-fs (md0): warning: mounting fs with errors, running e2fsck is recommended
Jun 10 08:44:13 emds1 kernel: LDISKFS-fs (md0): mounted filesystem with ordered data mode. quota=off. Opts: 
Jun 10 08:44:13 emds1 kernel: Lustre: echo-MDT0000: used disk, loading
Jun 10 08:44:14 emds1 kernel: Lustre: MGS: non-config logname received: params
Jun 10 08:44:14 emds1 kernel: Lustre: Skipped 1 previous similar message
Jun 10 08:44:14 emds1 kernel: LustreError: 11-0: echo-MDT0000-lwp-MDT0000: Communicating with 0@lo, operation mds_connect failed with -11.
Jun 10 08:44:14 emds1 kernel: LustreError: 94030:0:(mdt_handler.c:6274:mdt_iocontrol()) echo-MDT0000: Aborting recovery for device
Jun 10 08:44:19 emds1 kernel: Lustre: MGS: non-config logname received: params
Jun 10 08:44:19 emds1 kernel: Lustre: Skipped 1 previous similar message&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We&apos;re running some large rsyncs onto it now but I expect more issues of course.&lt;/p&gt;</comment>
                            <comment id="155397" author="pjones" created="Fri, 10 Jun 2016 17:20:42 +0000"  >&lt;p&gt;Yang Sheng&lt;/p&gt;

&lt;p&gt;Could you please assist with this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="155398" author="adilger" created="Fri, 10 Jun 2016 17:21:49 +0000"  >&lt;p&gt;It looks like you will need to run &quot;e2fsck -fy&quot; again to repair the corrupted directory.  I&apos;m not sure why e2fsck aborted, I haven&apos;t seen a problem like that before. &lt;/p&gt;</comment>
                            <comment id="155510" author="sfw" created="Mon, 13 Jun 2016 16:22:16 +0000"  >&lt;p&gt;The e2fsck seemed to stop at the same place again:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;emds1 /root # e2fsck -fvy -C 0 /dev/md0
e2fsck 1.42.13.wc5 (15-Apr-2016)
echo-MDT0000: recovering journal
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Illegal inode number passed to ext2fs_test_inode_bitmap #0 for in-use inode map
Internal error: couldn&apos;t find dir_info for 3040644673.
e2fsck: aborted&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Any suggestions...?&lt;/p&gt;</comment>
                            <comment id="155515" author="sfw" created="Mon, 13 Jun 2016 16:41:47 +0000"  >&lt;p&gt;We&apos;re quickly reaching the point where we&apos;re going to have to consider clearing things and starting from scratch which is something we&apos;d really rather not do.  We&apos;d appreciate any other options you can present.  If remote access would be useful, we can provide that.&lt;/p&gt;

&lt;p&gt;If clearing things is the only real option here, can we provide any extra info to determine why this might have happened?  As far as we can tell, the only thing that happened was the MDT filling up.&lt;/p&gt;</comment>
                            <comment id="155544" author="pjones" created="Mon, 13 Jun 2016 17:46:04 +0000"  >&lt;p&gt;Stephen&lt;/p&gt;

&lt;p&gt;How long would it take to get remote access in place? I know that you are based in the UK so what hours could a contact on site be available to work in realtime with one of our engineers?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="155545" author="sfw" created="Mon, 13 Jun 2016 17:49:59 +0000"  >&lt;p&gt;We can get that in place.  We&apos;re actually in Vancouver so Pacific hours.  I&apos;ll set up some SSH and get back to you.  Do you have an incoming IP I can restrict this to?&lt;/p&gt;

&lt;p&gt;Please mail me on sfw@dneg.com - Thanks.&lt;/p&gt;</comment>
                            <comment id="155549" author="pjones" created="Mon, 13 Jun 2016 18:00:54 +0000"  >&lt;p&gt;Stephen&lt;/p&gt;

&lt;p&gt;I&apos;ve sent you an email to get this going&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="155575" author="adilger" created="Mon, 13 Jun 2016 23:57:33 +0000"  >&lt;p&gt;Looking into this issue, it appears that the MDT filesystem is becoming full because of the use of hard-link trees for backup.  This results in each file having a large &lt;tt&gt;link&lt;/tt&gt; xattr that spills into an external block, as well as multiple directories referencing each file.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Inode count:              3079569408
Free inodes:              2166646521  == 912922887 files used
Block count:              1539776448
Free blocks:              41729879 == 159GB free, 5992186276 KB used == 5714 GB used, 6721 bytes used/inode
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It would normally not be possible to have more than about 4500 bytes used per inode, even with the external xattr block, but the extra directory trees are consuming this space.&lt;/p&gt;

&lt;p&gt;A file taken at random has a link count of 10 and an external xattr block (the &lt;tt&gt;File ACL&lt;/tt&gt; block):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;debugfs:  stat F_006_fx_smokep
F_006_fx_smokep: File not found by ext2_lookup
debugfs:  stat F_006_fx_smokeplumes_0010_comp_v001_01.nk
Inode: 2467900774   Type: regular    Mode:  0666   Flags: 0x0
Generation: 3837434978    Version: 0x00000014:f756e940
User:     0   Group:     0   Size: 0
File ACL: 1234063101    Directory ACL: 0
Links: 10   Blockcount: 8
Fragment:  Address: 0    Number: 0    Size: 0
 ctime: 0x57541f8e:00000000 -- Sun Jun  5 05:48:14 2016
 atime: 0x571b5195:00000000 -- Sat Apr 23 03:42:29 2016
 mtime: 0x539eb883:00000000 -- Mon Jun 16 02:27:31 2014
crtime: 0x571b5195:7f0d72f8 -- Sat Apr 23 03:42:29 2016
Size of extra inode fields: 28
Extended attributes stored in inode body:
  lma = &quot;00 00 00 00 00 00 00 00 40 66 00 00 02 00 00 00 81 b6 01 00 00 00 00 00
 &quot; (24)
  lma: fid=[0x200006640:0x1b681:0x0] compat=0 incompat=0
  lov = &quot;d0 0b d1 0b 01 00 00 00 81 b6 01 00 00 00 00 00 40 66 00 00 02 00 00 00
 00 00 10 00 01 00 00 00 6c 7c 0e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0
0 00 56 00 00 00 &quot; (56)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;As for the e2fsck problem, I haven&apos;t been able to debug it yet because the problem takes several hours to hit.  In the meantime, I&apos;ve fixed the (first) problem that was causing the MDT to be remounted read-only:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jun  8 00:56:41 emds1 kernel: LDISKFS-fs error (device md0): ldiskfs_mb_check_on
disk_bitmap: on-disk bitmap for group 0 corrupted: 57 blocks free in bitmap, 6 in gd
Jun  8 00:56:41 emds1 kernel: 
Jun  8 00:56:41 emds1 kernel: Aborting journal on device md0-8.
Jun  8 00:56:41 emds1 kernel: LDISKFS-fs (md0): Remounting filesystem read-only
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I manually marked all of the blocks in group 0 used, and changed the group summary to match, as well as recomputed the block group checksum, which is the safest workaround given that I don&apos;t know which block(s) are actually in use, or which of those values is correct.  It isn&apos;t clear if there are more errors like this, but I verified the next few groups had consistent block counts in the bitmap and group descriptors.&lt;/p&gt;

&lt;p&gt;There is an e2fsck read-only check running under GDB to hopefully be able to debug the problem, but it will take about 8h to hit the point of the prior corruption.&lt;/p&gt;</comment>
                            <comment id="155634" author="ys" created="Tue, 14 Jun 2016 05:45:35 +0000"  >&lt;p&gt;Look into e2fsck code, It is failed at:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;
int ext2fs_test_inode_bitmap_range(ext2fs_inode_bitmap bitmap,
                                   ino_t inode, int num)
{
        EXT2_CHECK_MAGIC(bitmap, EXT2_ET_MAGIC_INODE_BITMAP);
        if ((inode &amp;lt; bitmap-&amp;gt;start) || (inode+num-1 &amp;gt; bitmap-&amp;gt;real_end)) {
                ext2fs_warn_bitmap(EXT2_ET_BAD_INODE_TEST,
                                   inode, bitmap-&amp;gt;description);
                return 0;
        }
        return ext2fs_test_clear_generic_bitmap_range((ext2fs_generic_bitmap)
                                                      bitmap, inode, num);
}

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So looks like metadata is inconsistent. Maybe will passed after manual fixed.&lt;/p&gt;</comment>
                            <comment id="155636" author="adilger" created="Tue, 14 Jun 2016 06:32:25 +0000"  >&lt;p&gt;The &lt;tt&gt;ext2fs_warn_bitmap()&lt;/tt&gt; function is the source of the message &lt;tt&gt;Illegal inode number passed to ext2fs_test_inode_bitmap #0 for in-use inode map&lt;/tt&gt;, but it turns out that this isn&apos;t the reason the e2fsck was aborted since in the first run this message appeared some time before e2fsck was aborted.  That is caused by the later error &lt;tt&gt;Internal error: couldn&apos;t find dir_info for 3040644673&lt;/tt&gt;, which was one of the inodes repaired in a previous run.&lt;/p&gt;

&lt;p&gt;The &lt;tt&gt;Internal error&lt;/tt&gt; message did appear for a different inode the first time, so it does seem possible that e2fsck is still repairing these inodes in each run, but not as well as they should be, causing an abort each run.&lt;/p&gt;</comment>
                            <comment id="155881" author="adilger" created="Thu, 16 Jun 2016 04:49:09 +0000"  >&lt;p&gt;There were a couple of patches landed for 2.8.0 that make ldiskfs more robust in the face of similar corruption:&lt;/p&gt;

&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;&lt;a href=&quot;http://review.whamcloud.com/16679&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16679&lt;/a&gt; &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1026&quot; title=&quot;ldiskfs_mb_check_ondisk_bitmap: on-disk bitmap for group 23828 corrupted&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1026&quot;&gt;&lt;del&gt;LU-1026&lt;/del&gt;&lt;/a&gt; ldiskfs: make bitmaps corruption not fatal&quot;&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;http://review.whamcloud.com/16312&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16312&lt;/a&gt; &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7114&quot; title=&quot;ldiskfs: corrupted bitmaps handling patches&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7114&quot;&gt;&lt;del&gt;LU-7114&lt;/del&gt;&lt;/a&gt; ldiskfs: corrupted bitmaps handling patches&quot;&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="156022" author="adilger" created="Thu, 16 Jun 2016 23:57:57 +0000"  >&lt;p&gt;The problematic directory inode is &lt;tt&gt;3040644673&lt;/tt&gt; as reported by e2fsck and hasn&apos;t been fixed after several runs:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;debugfs:  stat &amp;lt;3040644673&amp;gt;
Inode: 3040644673   Type: directory    Mode:  0777   Flags: 0x0
Generation: 2336434627    Version: 0x00000012:75b668cd
User:   518   Group:    20   Size: 4096
File ACL: 0    Directory ACL: 0
Links: 2   Blockcount: 8
Fragment:  Address: 0    Number: 0    Size: 0
 ctime: 0x57421268:00000000 -- Sun May 22 13:11:20 2016
 atime: 0x57421268:00000000 -- Sun May 22 13:11:20 2016
 mtime: 0x56b2143f:00000000 -- Wed Feb  3 06:52:47 2016
crtime: 0x57421268:888eaf54 -- Sun May 22 13:11:20 2016
Size of extra inode fields: 28
Extended attributes stored in inode body:
  lma = &quot;00 00 00 00 00 00 00 00 b5 83 00 00 02 00 00 00 df a8 00 00 00 00 00 00
 &quot; (24)
  lma: fid=[0x2000083b5:0xa8df:0x0] compat=0 incompat=0
  link = &quot;df f1 ea 11 01 00 00 00 31 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0
0 00 19 00 00 00 02 00 00 83 b5 00 00 a8 bf 00 00 00 00 61 63 6c 6f 63 61 6c &quot; (
49)
BLOCKS:
(0):14090
TOTAL: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Dumping the directory block shows nothing completely broken, except that the parent entry &quot;..&quot; is inode &amp;lt;2&amp;gt;, which is the ext4 root inode that should never be used by Lustre directories, but shouldn&apos;t cause any problems for e2fsck:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;debugfs:  block_dump 14090
0000  418e 3cb5 0c00 0102 2e00 0000 0200 0000  A.&amp;lt;.............
0020  f40f 0202 2e2e 0000 2537 0000 1a37 0000  ........%7...7..
0040  2037 0000 2f37 0000 2e37 0000 2637 0000   7../7...7..&amp;amp;7..
0060  2d37 0000 0f37 0000 2737 0000 1337 0000  -7...7..&apos;7...7..
0100  1237 0000 1b37 0000 1e37 0000 2c37 0000  .7...7...7..,7..
0120  3037 0000 0000 0000 1f37 0000 0d37 0000  07.......7...7..
0140  3137 0000 1837 0000 3437 0000 1037 0000  17...7..47...7..
0160  2937 0000 3237 0000 3337 0000 1d37 0000  )7..27..37...7..
0200  1137 0000 3537 0000 2b37 0000 3637 0000  .7..57..+7..67..
0220  2837 0000 3737 0000 0000 0000 0000 0000  (7..77..........
0240  0000 0000 0000 0000 0000 0000 0000 0000  ................
*
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Even listing the directory shows nothing wrong with it except the &quot;..&quot; directory number:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;debugfs:  ls &amp;lt;3040644673&amp;gt;
 3040644673  (12) .    2  (4084) ..
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The code that is triggering is in &lt;tt&gt;e2fsck/pass2.c::check_dir_block()&lt;/tt&gt;, where &lt;tt&gt;e2fsck_dir_info_get_parent()&lt;/tt&gt; fails:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (e2fsck_dir_info_get_parent(ctx, dirent-&amp;gt;inode,
                                                       &amp;amp;subdir_parent)) {
                                cd-&amp;gt;pctx.ino = dirent-&amp;gt;inode;
                                fix_problem(ctx, PR_2_NO_DIRINFO, &amp;amp;cd-&amp;gt;pctx);
                                &lt;span class=&quot;code-keyword&quot;&gt;goto&lt;/span&gt; abort_free_dict;
                        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;after which e2fsck immediately aborts.&lt;/p&gt;

&lt;p&gt;I&apos;ll need to dig into the population of the dir_info list to see why it is choking on &quot;2&quot; as the parent, since every directory under the root &lt;tt&gt;&quot;/&quot;&lt;/tt&gt; directory also has &amp;lt;2&amp;gt; as the parent.  Maybe that is special-cased in the code?  Another possibility isn&apos;t that the parent directory &quot;2&quot; is the problem, but rather that  this directory is not connected anywhere?  I&apos;d think that would be handled by adding it to lost+found or similar.  It may be possible to debug this by creating a test filesystem that is similarly corrupted (non-root, maybe disconnected directory with &amp;lt;2&amp;gt; as the parent) to see what it is that triggers e2fsck to be unhappy.&lt;/p&gt;</comment>
                            <comment id="156038" author="bobijam" created="Fri, 17 Jun 2016 10:50:22 +0000"  >&lt;p&gt;I&apos;ve tried to created a /mnt/lustre/dir1/dir2 which is disconnected, and e2fsck reports this&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# e2fsck -fvy /dev/sdb
e2fsck 1.42.3.wc3 (15-Aug-2012)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Unconnected directory inode 25047 (/???)
Connect to /lost+found? yes

Pass 4: Checking reference counts
Inode 2 ref count is 12, should be 13.  Fix? yes

Inode 25047 ref count is 3, should be 2.  Fix? yes

Pass 5: Checking group summary information

lustre-MDT0000: ***** FILE SYSTEM WAS MODIFIED *****

     246 inodes used (0.25%)
       3 non-contiguous files (1.2%)
       0 non-contiguous directories (0.0%)
         # of inodes with ind/dind/tind blocks: 0/0/0
   17038 blocks used (34.08%)
       0 bad blocks
       1 large file

     119 regular files
     118 directories
       0 character device files
       0 block device files
       0 fifos
4294967295 links
       0 symbolic links (0 fast symbolic links)
       0 sockets
--------
     236 files
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It does not complain anything in the pass2.&lt;/p&gt;</comment>
                            <comment id="156105" author="adilger" created="Fri, 17 Jun 2016 23:37:23 +0000"  >&lt;p&gt;I tried to create a two-level subdirectory &lt;tt&gt;test/test2&lt;/tt&gt; and then zeroed out the leaf block of &lt;tt&gt;test1&lt;/tt&gt; so that there was no entry pointing to &lt;tt&gt;test2&lt;/tt&gt; and also set the &lt;tt&gt;&quot;..&quot;&lt;/tt&gt; entry of &lt;tt&gt;test2&lt;/tt&gt; to &amp;lt;2&amp;gt;, but this didn&apos;t show anything different.  Another test with just changing &quot;..&quot; of the &lt;tt&gt;test3&lt;/tt&gt; subdirectory to &amp;lt;2&amp;gt; didn&apos;t cause the problem.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;e2fsck 1.42.13.wc5 (15-Apr-2016)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Directory inode 12, block #0, offset 0: directory corrupted
Salvage&amp;lt;y&amp;gt;? yes
Missing &apos;.&apos; in directory inode 12.
Fix&amp;lt;y&amp;gt;? yes
Setting filetype for entry &apos;.&apos; in ??? (12) to 2.
Missing &apos;..&apos; in directory inode 12.
Fix&amp;lt;y&amp;gt;? yes
Setting filetype for entry &apos;..&apos; in ??? (12) to 2.
Pass 3: Checking directory connectivity
&apos;..&apos; in /test (12) is &amp;lt;The NULL inode&amp;gt; (0), should be / (2).
Fix&amp;lt;y&amp;gt;? yes
Unconnected directory inode 13 (/test/???)
Connect to /lost+found&amp;lt;y&amp;gt;? yes
&apos;..&apos; in /test2/test3 (15) is / (2), should be /test2 (14).
Fix&amp;lt;y&amp;gt;? yes
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt; I guess it is worthwhile to look into how directories are inserted into the parent directory list for &lt;tt&gt;e2fsck_dir_info_get_parent()&lt;/tt&gt; to find.  Something is preventing the &lt;tt&gt;3040644673&lt;/tt&gt; inode from being found in the parent directory list.&lt;/p&gt;</comment>
                            <comment id="182246" author="daire" created="Thu, 26 Jan 2017 13:40:31 +0000"  >&lt;p&gt;So it seems like this was eventually resolved (we have not had further issues), but I do have a related question.&lt;/p&gt;

&lt;p&gt;If we were to build this filesystem again, what would be the optimal inode size/ratio for performance considering we are likely to have 10+ hard links per file? Or does it not really matter and just ensuring we have more space available would be sufficient?&lt;/p&gt;</comment>
                            <comment id="182287" author="adilger" created="Thu, 26 Jan 2017 17:54:02 +0000"  >&lt;p&gt;Daire, if you have a large number of links per file, then increasing the amount of space per inode would improve your space efficiency somewhat.  Something like &lt;tt&gt;&amp;#45;&amp;#45;mkfsoptions=&quot;&amp;#45;i 4608&quot;&lt;/tt&gt; would provide room for one external xattr block per file&lt;/p&gt;</comment>
                            <comment id="182381" author="daire" created="Fri, 27 Jan 2017 14:07:38 +0000"  >&lt;p&gt;Okay, thanks. We&apos;ll give that a try on our next filesystem. I wonder how that will interact with a RAID stripe of 4k too? But then we are using NVMe to back MDTs now so I doubt there is much RAID overhead any more.&lt;/p&gt;</comment>
                            <comment id="182837" author="adilger" created="Tue, 31 Jan 2017 20:15:43 +0000"  >&lt;p&gt;The LBUG was fixed as part of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6696&quot; title=&quot;ASSERTION( rc == 0 || rc == LLOG_PROC_BREAK ) failed: 0 changes, 0 in progress, 0 in flight: -5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6696&quot;&gt;&lt;del&gt;LU-6696&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;As for Lustre + 4KB RAID chunk size, I don&apos;t think the MDT will really affect performance much, since it is (presumably) configured as RAID-1+0 and not RAID-6.&lt;/p&gt;</comment>
                            <comment id="182912" author="daire" created="Wed, 1 Feb 2017 17:00:51 +0000"  >&lt;p&gt;Well, I was toying with the idea of using a 3 x NVMe (6.4TB) RAID5 for the MDT... We need lots of inodes and some redundancy. Maybe I should just use 4 cards and RAID10.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="12976">LU-1026</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="30548">LU-6696</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="31980">LU-7114</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="21886" name="dir.3040644673.bin" size="4096" author="sfw" created="Fri, 17 Jun 2016 00:35:21 +0000"/>
                            <attachment id="21887" name="inode.3040644673.bin" size="4096" author="sfw" created="Fri, 17 Jun 2016 00:35:21 +0000"/>
                            <attachment id="21828" name="vmcore-dmesg.20160607.txt" size="141377" author="cyb" created="Wed, 8 Jun 2016 22:13:51 +0000"/>
                            <attachment id="21829" name="vmcore-dmesg.20160608.txt" size="160881" author="cyb" created="Wed, 8 Jun 2016 22:13:51 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzye5j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>