<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:44:39 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11528] sanity-lfsck test_11a: soft lockup - CPU#0 stuck for 22s</title>
                <link>https://jira.whamcloud.com/browse/LU-11528</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for wangshilong &amp;lt;wshilong@ddn.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/0081d858-d128-11e8-ad90-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/0081d858-d128-11e8-ad90-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_11a failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;trevis-3vm10 crashed during sanity-lfsck test_11a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;[ 8560.259642] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! &lt;span class=&quot;error&quot;&gt;&amp;#91;tx_commit_cb:17212&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.260765] Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgc(OE) osd_zfs(OE) lquota(OE) fid(OE) fld(OE) ksocklnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core sunrpc dm_mod zfs(POE) zunicode(POE) zavl(POE) icp(POE) iosf_mbi crc32_pclmul ghash_clmulni_intel zcommon(POE) znvpair(POE) spl(OE) ppdev aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev virtio_balloon i2c_piix4 i2c_core parport_pc parport ip_tables ata_generic pata_acpi ext4 mbcache jbd2 virtio_blk ata_piix&lt;br/&gt;
[ 8560.271741]  8139too crct10dif_pclmul crct10dif_common libata crc32c_intel virtio_pci 8139cp serio_raw virtio_ring virtio mii floppy&lt;br/&gt;
[ 8560.273464] CPU: 0 PID: 17212 Comm: tx_commit_cb Kdump: loaded Tainted: P           OE  ------------   3.10.0-862.14.4.el7_lustre.x86_64 #1&lt;br/&gt;
[ 8560.275047] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011&lt;br/&gt;
[ 8560.275786] task: ffff8cc8e51dcf10 ti: ffff8cc8d7594000 task.ti: ffff8cc8d7594000&lt;br/&gt;
[ 8560.276737] RIP: 0010:&lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0d03b69&amp;gt;&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0d03b69&amp;gt;&amp;#93;&lt;/span&gt; dt_txn_hook_commit+0x49/0x60 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.278114] RSP: 0018:ffff8cc8d7597cc8  EFLAGS: 00000246&lt;br/&gt;
[ 8560.278800] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff8cc8df5a6000&lt;br/&gt;
[ 8560.279713] RDX: ffff8cc8df5a6048 RSI: 0000000000000000 RDI: ffff8cc8d83dba00&lt;br/&gt;
[ 8560.280628] RBP: ffff8cc8d7597cd8 R08: ffff8cc8df18f020 R09: 0000000000000001&lt;br/&gt;
[ 8560.281543] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8cc8ffc18bc0&lt;br/&gt;
[ 8560.282456] R13: ffff8cc8e51dcf78 R14: 0000000100010700 R15: ffff8cc8df555c00&lt;br/&gt;
[ 8560.283369] FS:  0000000000000000(0000) GS:ffff8cc8ffc00000(0000) knlGS:0000000000000000&lt;br/&gt;
[ 8560.284416] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033&lt;br/&gt;
[ 8560.285160] CR2: 00007fc5a1b15000 CR3: 000000000f20e000 CR4: 00000000000606f0&lt;br/&gt;
[ 8560.286082] Call Trace:&lt;br/&gt;
[ 8560.286457]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc116b788&amp;gt;&amp;#93;&lt;/span&gt; osd_trans_commit_cb+0xe8/0x480 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.287462]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc04fca98&amp;gt;&amp;#93;&lt;/span&gt; dmu_tx_do_callbacks+0x48/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.288336]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0547724&amp;gt;&amp;#93;&lt;/span&gt; txg_do_callbacks+0x14/0x30 &lt;span class=&quot;error&quot;&gt;&amp;#91;zfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.289169]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc03ddd2c&amp;gt;&amp;#93;&lt;/span&gt; taskq_thread+0x2ac/0x4f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.289981]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff932d2010&amp;gt;&amp;#93;&lt;/span&gt; ? wake_up_state+0x20/0x20&lt;br/&gt;
[ 8560.290715]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc03dda80&amp;gt;&amp;#93;&lt;/span&gt; ? taskq_thread_spawn+0x60/0x60 &lt;span class=&quot;error&quot;&gt;&amp;#91;spl&amp;#93;&lt;/span&gt;&lt;br/&gt;
[ 8560.291584]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff932bdf21&amp;gt;&amp;#93;&lt;/span&gt; kthread+0xd1/0xe0&lt;br/&gt;
[ 8560.292220]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff932bde50&amp;gt;&amp;#93;&lt;/span&gt; ? insert_kthread_work+0x40/0x40&lt;br/&gt;
[ 8560.293026]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff939255f7&amp;gt;&amp;#93;&lt;/span&gt; ret_from_fork_nospec_begin+0x21/0x21&lt;br/&gt;
[ 8560.293870]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff932bde50&amp;gt;&amp;#93;&lt;/span&gt; ? insert_kthread_work+0x40/0x40&lt;br/&gt;
[ 8560.294660] Code: 51 48 48 39 d0 48 8d 58 d8 74 31 0f 1f 80 00 00 00 00 48 8b 43 10 48 85 c0 74 10 48 8b 73 18 4c 89 e7 e8 5b b9 85 d2 49 8b 0c 24 &amp;lt;48&amp;gt; 8b 43 28 48 8d 51 48 48 39 d0 48 8d 58 d8 75 d6 5b 41 5c 5d &lt;br/&gt;
[ 8560.298709] Kernel panic - not syncing: softlockup: hung tasks&lt;/p&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
sanity-lfsck test_11a - trevis-3vm10 crashed during sanity-lfsck test_11a&lt;/p&gt;</description>
                <environment></environment>
        <key id="53615">LU-11528</key>
            <summary>sanity-lfsck test_11a: soft lockup - CPU#0 stuck for 22s</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>zfs</label>
                    </labels>
                <created>Tue, 16 Oct 2018 15:12:31 +0000</created>
                <updated>Tue, 5 Nov 2019 05:33:34 +0000</updated>
                            <resolved>Sat, 15 Dec 2018 18:12:30 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="234987" author="wshilong" created="Tue, 16 Oct 2018 15:13:59 +0000"  >&lt;p&gt;One of possible related to fix:&lt;/p&gt;

&lt;p&gt;commit 823d48bfb182137c53b9432498f1f0564eaa8bfc&lt;br/&gt;
Author: lidongyang &amp;lt;gnaygnodil@gmail.com&amp;gt;&lt;br/&gt;
Date:   Sat Dec 23 05:19:51 2017 +1100&lt;br/&gt;
    Call commit callbacks from the tail of the list&lt;/p&gt;

&lt;p&gt;    Our zfs backed Lustre MDT had soft lockups while under heavy metadata&lt;br/&gt;
    workloads while handling transaction callbacks from osd_zfs.&lt;/p&gt;

&lt;p&gt;    The problem is zfs is not taking advantage of the fast path in&lt;br/&gt;
    Lustre&apos;s trans callback handling, where Lustre will skip the calls&lt;br/&gt;
    to ptlrpc_commit_replies() when it already saw a higher transaction&lt;br/&gt;
    number.&lt;/p&gt;

&lt;p&gt;    This patch corrects this, it also has a positive impact on metadata&lt;br/&gt;
    performance on Lustre with osd_zfs, plus some cleanup in the headers.&lt;/p&gt;

&lt;p&gt;    A similar issue for ext4/ldiskfs is described on:   &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6527&quot; title=&quot;Journal commit callback opitmization&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6527&quot;&gt;&lt;del&gt;LU-6527&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;    Reviewed-by: Olaf Faaland &amp;lt;faaland1@llnl.gov&amp;gt;&lt;br/&gt;
    Reviewed-by: Brian Behlendorf &amp;lt;behlendorf1@llnl.gov&amp;gt;&lt;br/&gt;
    Signed-off-by: Li Dongyang &amp;lt;dongyang.li@anu.edu.au&amp;gt;&lt;br/&gt;
    Closes #6986&lt;/p&gt;

&lt;p&gt;And the above fix released since zfs-0.8.0-rc1..&lt;/p&gt;</comment>
                            <comment id="238657" author="adilger" created="Sat, 15 Dec 2018 17:54:09 +0000"  >&lt;p&gt;This patch was landed to the &lt;tt&gt;zfs-0.7-release&lt;/tt&gt; branch as &lt;tt&gt;zfs-0.7.5-18-g8d82a19de&lt;/tt&gt;, so it should already be included in our testing.&lt;/p&gt;</comment>
                            <comment id="238658" author="adilger" created="Sat, 15 Dec 2018 17:55:06 +0000"  >&lt;p&gt;+1 &lt;a href=&quot;https://testing.whamcloud.com/test_sets/b4abcd02-0044-11e9-93ea-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/b4abcd02-0044-11e9-93ea-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="238660" author="adilger" created="Sat, 15 Dec 2018 18:12:30 +0000"  >&lt;p&gt;This is the same as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="46839">LU-9706</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i004an:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>