<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:34:34 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17332] sanity test_820: kernel BUG at fs/jbd2/transaction.c:378</title>
                <link>https://jira.whamcloud.com/browse/LU-17332</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/58a6b07c-fb1f-4a2d-ac3c-d7578d6b134f&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/58a6b07c-fb1f-4a2d-ac3c-d7578d6b134f&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_820 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;trevis-28vm3 crashed during sanity test_820

[26282.338565] Lustre: server umount lustre-OST0004 complete
[26282.411017] ------------[ cut here ]------------
[26282.412061] kernel BUG at fs/jbd2/transaction.c:378!
[26282.413171] invalid opcode: 0000 [#1] SMP PTI
[26282.414068] CPU: 1 PID: 784404 Comm: kworker/1:5 4.18.0-477.15.1.el8_lustre.x86_64 #1
[26282.416473] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[26282.435639] Call Trace:
[26282.438083]  jbd2__journal_start+0xee/0x1f0 [jbd2]
[26282.439047]  jbd2_journal_start+0x19/0x20 [jbd2]
[26282.439979]  flush_stashed_stats_work+0x36/0x90 [ldiskfs]
[26282.441086]  process_one_work+0x1a7/0x360
[26282.442753]  worker_thread+0x30/0x390
[26282.444311]  kthread+0x134/0x150
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Test session details:&lt;br/&gt;
clients: &lt;a href=&quot;https://build.whamcloud.com/job/lustre-master/4445&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.whamcloud.com/job/lustre-master/4445&lt;/a&gt; - 4.18.0-477.15.1.el8_8.x86_64&lt;br/&gt;
servers: &lt;a href=&quot;https://build.whamcloud.com/job/lustre-master/4445&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.whamcloud.com/job/lustre-master/4445&lt;/a&gt; - 4.18.0-477.15.1.el8_lustre.x86_64&lt;/p&gt;

&lt;p&gt;This started around 2023-07-21 +/- 7 days.  It looks like the workqueue is somehow running &lt;b&gt;after&lt;/b&gt; the journal is cleaned up, since the BUG is &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; jbd2_journal_destroy(journal_t *journal)
{       
        &lt;span class=&quot;code-comment&quot;&gt;/* Wait &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; the commit thread to wake up and die. */&lt;/span&gt;
        journal_kill_thread(journal);
        :
}

&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; void journal_kill_thread(journal_t *journal)
{               
        journal-&amp;gt;j_flags |= JBD2_UNMOUNT;
        :
}

&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; start_this_handle(journal_t *journal, handle_t *handle,
                             gfp_t gfp_mask)
{
        :
        BUG_ON(journal-&amp;gt;j_flags &amp;amp; JBD2_UNMOUNT);
        :
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
sanity test_820 - trevis-28vm3 crashed during sanity test_820&lt;/p&gt;</description>
                <environment></environment>
        <key id="79310">LU-17332</key>
            <summary>sanity test_820: kernel BUG at fs/jbd2/transaction.c:378</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="dongyang">Dongyang Li</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 4 Dec 2023 08:56:13 +0000</created>
                <updated>Wed, 3 Jan 2024 14:20:01 +0000</updated>
                            <resolved>Wed, 3 Jan 2024 14:20:01 +0000</resolved>
                                    <version>Lustre 2.16.0</version>
                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="396100" author="adilger" created="Fri, 8 Dec 2023 21:10:45 +0000"  >&lt;p&gt;I pushed patch &lt;a href=&quot;https://review.whamcloud.com/53242&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/53242&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16032&quot; title=&quot; Truncate for large objects can lead to a thread hung&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16032&quot;&gt;&lt;del&gt;LU-16032&lt;/del&gt;&lt;/a&gt; osd-ldiskfs: track backlog of unlinked objects&lt;/tt&gt;&quot; to see if I can flush the workqueue at unmount time, but it doesn&apos;t seem to be quite correct.  It seems to make the unmount worse, adding a crash in lu_env handling, possibly because the accounting is done in the OSD device instead of the superblock.&lt;/p&gt;</comment>
                            <comment id="397013" author="pjones" created="Fri, 15 Dec 2023 18:16:05 +0000"  >&lt;p&gt;Dongyang&lt;/p&gt;

&lt;p&gt;Could you please investigate?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="397159" author="dongyang" created="Mon, 18 Dec 2023 04:44:19 +0000"  >&lt;p&gt;The test failure referred by the ticket was on 2023-07-21 &lt;a href=&quot;https://testing.whamcloud.com/test_sets/58a6b07c-fb1f-4a2d-ac3c-d7578d6b134f&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/58a6b07c-fb1f-4a2d-ac3c-d7578d6b134f&lt;/a&gt;&lt;br/&gt;
And I believe it&apos;s because of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16982&quot; title=&quot;Crash lustre after umount -d -f /mnt/lustre-mds4&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16982&quot;&gt;&lt;del&gt;LU-16982&lt;/del&gt;&lt;/a&gt;, the superblock update was scheduled after journal destroy, since the patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16982&quot; title=&quot;Crash lustre after umount -d -f /mnt/lustre-mds4&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16982&quot;&gt;&lt;del&gt;LU-16982&lt;/del&gt;&lt;/a&gt; landed in Aug, we should not see this anymore. and I just did a grep on the trevis dump files to verify this.&lt;/p&gt;

&lt;p&gt;However I do find out there&apos;s a similar crash happening recently on el9 servers, the trace is like this:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 9096.356946] Lustre: DEBUG MARKER: umount -d -f /mnt/lustre-ost2
[ 9097.522537] LDISKFS-fs (dm-9): unmounting filesystem.
[ 9098.085442] ------------[ cut here ]------------
[ 9098.086099] kernel BUG at fs/jbd2/transaction.c:384!
[ 9098.086804] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[ 9098.087460] CPU: 0 PID: 316553 Comm: kworker/0:2 Kdump: loaded Tainted: G           OE    --------  ---  5.14.0-284.30.1_lustre.el9.x86_64 #1
[ 9098.088869] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 9098.089558] Workqueue: events flush_stashed_error_work [ldiskfs]
[ 9098.090478] RIP: 0010:start_this_handle+0x234/0x580 [jbd2]
[ 9098.091187] Code: 40 48 89 44 24 48 41 83 79 0c 02 74 1f 0f 0b 48 89 ef e8 5f 1c 67 e7 48 89 ef e8 67 1e 67 e7 49 8b 06 a8 01 0f 84 77 fe ff ff &amp;lt;0f&amp;gt; 0b 4d 8d be 90 00 00 00 ba 02 00 00 00 48 8d 74 24 28 4c 89 ff
[ 9098.093212] RSP: 0018:ffffb92607a37d90 EFLAGS: 00010202
[ 9098.093837] RAX: 0000000000000031 RBX: ffff92b1f5e6ba48 RCX: 00000000000000e0
[ 9098.094663] RDX: 0000000000008d40 RSI: ffffffffc06c3bf8 RDI: ffff92b1e6e3884c
[ 9098.095493] RBP: ffff92b1e6e3884c R08: 0000000000000100 R09: 0000000000000000
[ 9098.096334] R10: ffff92b220ddf900 R11: 0000000000000106 R12: ffff92b220ddf900
[ 9098.097167] R13: 0000000000000000 R14: ffff92b1e6e38800 R15: 0000000000000001
[ 9098.097995] FS:  0000000000000000(0000) GS:ffff92b27cc00000(0000) knlGS:0000000000000000
[ 9098.098933] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 9098.099609] CR2: 000055c0889ae840 CR3: 000000008ed7a005 CR4: 00000000000606f0
[ 9098.100438] Call Trace:
[ 9098.100782]  &amp;lt;TASK&amp;gt;
[ 9098.101098]  ? kmem_cache_alloc+0x17d/0x300
[ 9098.101661]  jbd2__journal_start+0xfb/0x200 [jbd2]
[ 9098.102254]  jbd2_journal_start+0x19/0x20 [jbd2]
[ 9098.102866]  flush_stashed_error_work+0x4a/0xf0 [ldiskfs]
[ 9098.103543]  process_one_work+0x1e5/0x3b0
[ 9098.104078]  ? rescuer_thread+0x390/0x390
[ 9098.104578]  worker_thread+0x50/0x3a0
[ 9098.105052]  ? rescuer_thread+0x390/0x390
[ 9098.105549]  kthread+0xd9/0x100
[ 9098.105978]  ? kthread_complete_and_exit+0x20/0x20
[ 9098.106571]  ret_from_fork+0x22/0x30
[ 9098.107085]  &amp;lt;/TASK&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The reason is &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16982&quot; title=&quot;Crash lustre after umount -d -f /mnt/lustre-mds4&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16982&quot;&gt;&lt;del&gt;LU-16982&lt;/del&gt;&lt;/a&gt; didn&apos;t fix the problem for el9 series, only the el8. I will prepare a patch for this.&lt;/p&gt;</comment>
                            <comment id="397364" author="gerrit" created="Tue, 19 Dec 2023 01:06:40 +0000"  >&lt;p&gt;&quot;Li Dongyang &amp;lt;dongyangli@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/53499&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/53499&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17332&quot; title=&quot;sanity test_820: kernel BUG at fs/jbd2/transaction.c:378&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17332&quot;&gt;&lt;del&gt;LU-17332&lt;/del&gt;&lt;/a&gt; ldiskfs: do no update superblock after journal destroy&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 63c16ede8ea0075f670039bf087579226da8ca76&lt;/p&gt;</comment>
                            <comment id="398362" author="gerrit" created="Wed, 3 Jan 2024 03:01:33 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/53499/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/53499/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17332&quot; title=&quot;sanity test_820: kernel BUG at fs/jbd2/transaction.c:378&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17332&quot;&gt;&lt;del&gt;LU-17332&lt;/del&gt;&lt;/a&gt; ldiskfs: do no update superblock after journal destroy&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 8cdfa1d50806ff5ef165909956201592e4f11b9b&lt;/p&gt;</comment>
                            <comment id="398415" author="pjones" created="Wed, 3 Jan 2024 14:20:01 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="71291">LU-16032</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i043pb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>