<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:28:31 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9706] conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()</title>
                <link>https://jira.whamcloud.com/browse/LU-9706</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah_lw &amp;lt;wei3.liu@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/c25bf48e-53d1-11e7-a749-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/c25bf48e-53d1-11e7-a749-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_53a failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test failed to respond and timed out
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;env: tag-2.9.59 #3603  ZFS&lt;/p&gt;

&lt;p&gt;MDS console&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;05:24:43:[40506.686671] Lustre: MGS: Connection restored to 1a7ddb58-f96d-6d7b-dd19-c36909e007a9 (at 0@lo)
05:24:43:[40506.826682] Lustre: lustre-MDT0000: Imperative Recovery not enabled, recovery window 60-180
05:24:43:[40506.829961] LustreError: 16342:0:(osd_oi.c:503:osd_oid()) lustre-MDT0000-osd: unsupported quota oid: 0x16
05:24:43:[40507.031480] Lustre: DEBUG MARKER: /usr/sbin/lctl get_param -n health_check
05:24:43:[40507.336736] Lustre: DEBUG MARKER: PATH=/usr/lib64/lustre/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:/opt/iozone/bin:/usr/lib64/lustre/tests//usr/lib64/lustre/tests:/usr/lib64/lustre/tests:/usr/lib64/lustre/tests/../utils:/opt/iozone/bin:/usr/lib64/lustre/tests/mpi:/usr/lib64/lust
05:24:43:[40507.892722] Lustre: DEBUG MARKER: /usr/sbin/lctl mark trevis-46vm7.trevis.hpdd.intel.com: executing set_default_debug -1 all 4
05:24:43:[40507.893599] Lustre: DEBUG MARKER: /usr/sbin/lctl mark trevis-46vm7.trevis.hpdd.intel.com: executing set_default_debug -1 all 4
05:24:43:[40508.120605] Lustre: DEBUG MARKER: trevis-46vm7.trevis.hpdd.intel.com: executing set_default_debug -1 all 4
05:24:43:[40508.128355] Lustre: DEBUG MARKER: trevis-46vm7.trevis.hpdd.intel.com: executing set_default_debug -1 all 4
05:24:43:[40508.289854] Lustre: DEBUG MARKER: lctl set_param -n mdt.lustre*.enable_remote_dir=1
05:24:43:[40508.591867] Lustre: DEBUG MARKER: zfs get -H -o value 				lustre:svname lustre-mdt1/mdt1 2&amp;gt;/dev/null | 				grep -E &apos;:[a-zA-Z]{3}[0-9]{4}&apos;
05:24:43:[40508.901939] Lustre: DEBUG MARKER: zfs get -H -o value 				lustre:svname lustre-mdt1/mdt1 2&amp;gt;/dev/null | 				grep -E &apos;:[a-zA-Z]{3}[0-9]{4}&apos;
05:24:43:[40509.212232] Lustre: DEBUG MARKER: zfs get -H -o value lustre:svname 		                           lustre-mdt1/mdt1 2&amp;gt;/dev/null
05:24:43:[40509.520874] Lustre: DEBUG MARKER: lctl set_param -n mdt.lustre*.enable_remote_dir=1
05:24:43:[40511.168162] Lustre: MGS: Connection restored to 00faafbe-57f8-e637-4480-1f8b3bfa5137 (at 10.9.6.35@tcp)
05:24:43:[40511.800039] Lustre: 15064:0:(client.c:2114:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1497677067/real 1497677067]  req@ffff8800363c0300 x1570428225061024/t0(0) o8-&amp;gt;lustre-OST0000-osc-MDT0000@10.9.6.35@tcp:28/4 lens 520/544 e 0 to 1 dl 1497677072 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
05:24:43:[40512.608633] Lustre: DEBUG MARKER: /usr/sbin/lctl mark trevis-46vm8.trevis.hpdd.intel.com: executing set_default_debug -1 all 4
05:25:17:[40512.800854] Lustre: DEBUG MARKER: trevis-46vm8.trevis.hpdd.intel.com: executing set_default_debug -1 all 4
05:25:17:[40513.793184] Lustre: MGS: Connection restored to 7310674c-545b-174e-1751-630f6aaa5a8a (at 10.9.6.28@tcp)
05:25:17:[40513.796051] Lustre: Skipped 2 previous similar messages
05:25:17:[40516.831820] LustreError: 11-0: lustre-OST0000-osc-MDT0000: operation ost_statfs to node 10.9.6.35@tcp failed: rc = -107
05:25:17:[40516.835465] Lustre: lustre-OST0000-osc-MDT0000: Connection to lustre-OST0000 (at 10.9.6.35@tcp) was lost; in progress operations using this service will wait for recovery to complete
05:25:17:[40518.877430] Lustre: DEBUG MARKER: grep -c /mnt/lustre-mds1&apos; &apos; /proc/mounts
05:25:17:[40519.195771] Lustre: DEBUG MARKER: umount -d -f /mnt/lustre-mds1
05:25:17:[40544.062002] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [tx_commit_cb:16414]
05:25:17:
05:25:17:[40544.062002] Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) osd_zfs(OE) lquota(OE) lustre(OE) obdecho(OE) mgc(OE) lov(OE) osc(OE) mdc(OE) lmv(OE) fid(OE) fld(OE) ptlrpc_gss(OE) ptlrpc(OE) obdclass(OE) ksocklnd(OE) lnet(OE) libcfs(OE) zfs(POE) zunicode(POE) zavl(POE) zcommon(POE) znvpair(POE) spl(OE) zlib_deflate dm_mod rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core iosf_mbi crc32_pclmul ghash_clmulni_intel aesni_intel ppdev lrw gf128mul glue_helper ablk_helper cryptd pcspkr virtio_balloon i2c_piix4 parport_pc parport nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_blk cirrus crct10dif_pclmul crct10dif_common drm_kms_helper syscopyarea sysfillrect crc32c_intel 8139too serio_raw sysimgblt virtio_pci fb_sys_fops virtio_ring ttm virtio ata_piix 8139cp mii drm libata i2c_core floppy [last unloaded: libcfs]
05:25:17:[40544.062002] CPU: 1 PID: 16414 Comm: tx_commit_cb Tainted: P           OE  ------------   3.10.0-514.21.1.el7_lustre.x86_64 #1
05:25:17:[40544.062002] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
05:25:17:[40544.062002] task: ffff88005d50bec0 ti: ffff88004361c000 task.ti: ffff88004361c000
05:25:17:[40544.062002] RIP: 0010:[&amp;lt;ffffffffa08c4fe4&amp;gt;]  [&amp;lt;ffffffffa08c4fe4&amp;gt;] dt_txn_hook_commit+0x34/0x60 [obdclass]
05:25:17:[40544.062002] RSP: 0018:ffff88004361fd78  EFLAGS: 00000297
05:25:17:[40544.062002] RAX: 0000000000000000 RBX: ffff8800694b94a0 RCX: ffff880069efc000
05:25:17:[40544.062002] RDX: ffff880069efc048 RSI: ffffffffa1287600 RDI: ffff88005e026e00
05:25:17:[40544.062002] RBP: ffff88004361fd88 R08: 20737365636f7250 R09: 0a64657265746e65
05:25:17:[40544.062002] R10: ffff880014dc2535 R11: 0a64657265746e65 R12: 0000000000000000
05:25:17:[40544.062002] R13: ffffffff810ce9ac R14: ffff88004361fd20 R15: 00000000a57bc182
05:25:17:[40544.062002] FS:  0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
05:25:17:[40544.062002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
05:25:17:[40544.062002] CR2: 00007f23d5a9e000 CR3: 000000007b0d7000 CR4: 00000000000406e0
05:25:17:[40544.062002] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
05:25:17:[40544.062002] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
05:25:17:[40544.062002] Stack:
05:25:17:[40544.062002]  ffff8800694b94a0 ffff88005cbd0600 ffff88004361fdd0 ffffffffa0f676a8
05:25:17:[40544.062002]  ffff880069efc000 ffff88007c0c0000 ffff8800694b94a0 ffff8800694b90f0
05:25:17:[40544.062002]  0000000000000000 ffff8800694b90e0 ffff880079f78800 ffff88004361fe00
05:25:17:[40544.062002] Call Trace:
05:25:17:[40544.062002]  [&amp;lt;ffffffffa0f676a8&amp;gt;] osd_trans_commit_cb+0xe8/0x490 [osd_zfs]
05:25:17:[40544.062002]  [&amp;lt;ffffffffa17d05e4&amp;gt;] dmu_tx_do_callbacks+0x44/0x70 [zfs]
05:25:17:[40544.062002]  [&amp;lt;ffffffffa1814c74&amp;gt;] txg_do_callbacks+0x14/0x30 [zfs]
05:25:17:[40544.062002]  [&amp;lt;ffffffffa06d36de&amp;gt;] taskq_thread+0x22e/0x440 [spl]
05:25:17:[40544.062002]  [&amp;lt;ffffffff810c54e0&amp;gt;] ? wake_up_state+0x20/0x20
05:25:17:[40544.062002]  [&amp;lt;ffffffffa06d34b0&amp;gt;] ? taskq_thread_spawn+0x60/0x60 [spl]
05:25:17:[40544.062002]  [&amp;lt;ffffffff810b0a4f&amp;gt;] kthread+0xcf/0xe0
05:25:17:[40544.062002]  [&amp;lt;ffffffff810b0980&amp;gt;] ? kthread_create_on_node+0x140/0x140
05:25:17:[40544.062002]  [&amp;lt;ffffffff81697798&amp;gt;] ret_from_fork+0x58/0x90
05:25:17:[40544.062002]  [&amp;lt;ffffffff810b0980&amp;gt;] ? kthread_create_on_node+0x140/0x140
05:25:17:[40544.062002] Code: 41 54 49 89 fc 53 f6 47 48 02 75 42 48 8b 0f 48 8b 41 48 48 8d 51 48 48 39 d0 48 8d 58 d8 74 2e 0f 1f 80 00 00 00 00 48 8b 43 10 &amp;lt;48&amp;gt; 85 c0 74 0d 48 8b 73 18 4c 89 e7 ff d0 49 8b 0c 24 48 8b 43 
05:25:17:[40544.062002] Kernel panic - not syncing: softlockup: hung tasks
05:25:17:[40544.062002] CPU: 1 PID: 16414 Comm: tx_commit_cb Tainted: P           OEL ------------   3.10.0-514.21.1.el7_lustre.x86_64 #1
05:25:17:[40544.062002] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
05:25:17:[40544.062002]  ffffffff818da7c9 00000000a57bc182 ffff88007fd03e18 ffffffff8168717f
05:25:17:[40544.062002]  ffff88007fd03e98 ffffffff816805aa 0000000000000008 ffff88007fd03ea8
05:25:17:[40544.062002]  ffff88007fd03e48 00000000a57bc182 ffff88007fd03e67 0000000000000000
05:25:17:[40544.062002] Call Trace:
05:25:17:[40544.062002]  &amp;lt;IRQ&amp;gt;  [&amp;lt;ffffffff8168717f&amp;gt;] dump_stack+0x19/0x1b
05:25:17:[40544.062002]  [&amp;lt;ffffffff816805aa&amp;gt;] panic+0xe3/0x1f2
05:25:17:[40544.062002]  [&amp;lt;ffffffff8112f444&amp;gt;] watchdog_timer_fn+0x234/0x240
05:25:17:[40544.062002]  [&amp;lt;ffffffff8112f210&amp;gt;] ? watchdog+0x50/0x50
05:25:17:[40544.062002]  [&amp;lt;ffffffff810b4d72&amp;gt;] __hrtimer_run_queues+0xd2/0x260
05:25:17:[40544.062002]  [&amp;lt;ffffffff810b5310&amp;gt;] hrtimer_interrupt+0xb0/0x1e0
05:25:17:[40544.062002]  [&amp;lt;ffffffff816992dc&amp;gt;] ? call_softirq+0x1c/0x30
05:25:17:[40544.062002]  [&amp;lt;ffffffff81050ff7&amp;gt;] local_apic_timer_interrupt+0x37/0x60
05:25:17:[40544.062002]  [&amp;lt;ffffffff81699f4f&amp;gt;] smp_apic_timer_interrupt+0x3f/0x60
05:25:17:[40544.062002]  [&amp;lt;ffffffff8169849d&amp;gt;] apic_timer_interrupt+0x6d/0x80
05:25:17:[40544.062002]  &amp;lt;EOI&amp;gt;  [&amp;lt;ffffffffa08c4fe4&amp;gt;] ? dt_txn_hook_commit+0x34/0x60 [obdclass]
05:25:17:[40544.062002]  [&amp;lt;ffffffffa08c4ff2&amp;gt;] ? dt_txn_hook_commit+0x42/0x60 [obdclass]
05:25:17:[40544.062002]  [&amp;lt;ffffffffa0f676a8&amp;gt;] osd_trans_commit_cb+0xe8/0x490 [osd_zfs]
05:25:17:[    0.000000] Initializing cgroup subsys cpuset
05:25:17:[    0.000000] Initializing cgroup subsys cpu
05:25:17:[    0.000000] Initializing cgroup subsys cpuacct
05:25:17:[    0.000000] Linux version 3.10.0-514.21.1.el7_lustre.x86_64 (jenkins@trevis-310-el7
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="46839">LU-9706</key>
            <summary>conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>zfs</label>
                    </labels>
                <created>Thu, 22 Jun 2017 22:46:52 +0000</created>
                <updated>Mon, 1 Apr 2019 14:14:31 +0000</updated>
                            <resolved>Wed, 27 Feb 2019 05:37:32 +0000</resolved>
                                    <version>Lustre 2.10.0</version>
                    <version>Lustre 2.11.0</version>
                    <version>Lustre 2.12.0</version>
                                    <fixVersion>Lustre 2.13.0</fixVersion>
                    <fixVersion>Lustre 2.10.7</fixVersion>
                    <fixVersion>Lustre 2.12.1</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>13</watches>
                                                                            <comments>
                            <comment id="212980" author="jamesanunez" created="Tue, 7 Nov 2017 14:38:48 +0000"  >&lt;p&gt;I&apos;ve seen the same hang on MDS for conf-sanity test 53b. Logs are at &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/1d538b38-c347-11e7-88ab-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/1d538b38-c347-11e7-88ab-52540065bddc&lt;/a&gt;. This is the only time 53b hung on MDS umount in the past two months.&lt;/p&gt;</comment>
                            <comment id="213951" author="yujian" created="Thu, 16 Nov 2017 19:56:55 +0000"  >&lt;p&gt;conf-sanity test 39 in ZFS test session also hit the same failure: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/63f771f2-cae6-11e7-9840-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/63f771f2-cae6-11e7-9840-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Console log on MDS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: umount -d -f /mnt/lustre-mds1
NMI watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [tx_commit_cb:27046]
Modules linked in: osd_zfs(OE) zfs(POE) obdecho(OE) ptlrpc_gss(OE) ofd(OE) ost(OE) lustre(OE) lmv(OE) mdc(OE) osc(OE) lov(OE) osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) mgc(OE) lquota(OE) fid(OE) fld(OE) ksocklnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) zunicode(POE) zavl(POE) icp(POE) zcommon(POE) znvpair(POE) spl(OE) libcfs(OE) dm_mod rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core iosf_mbi crc32_pclmul ghash_clmulni_intel ppdev aesni_intel lrw gf128mul glue_helper ablk_helper cryptd joydev parport_pc pcspkr virtio_balloon parport i2c_piix4 i2c_core nfsd nfs_acl lockd auth_rpcgss grace sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_blk ata_piix libata crct10dif_pclmul crct10dif_common 8139too crc32c_intel floppy virtio_pci virtio_ring serio_raw virtio 8139cp mii [last unloaded: zfs] 
CPU: 1 PID: 27046 Comm: tx_commit_cb Tainted: P           OE  ------------   3.10.0-693.5.2.el7_lustre.x86_64 #1
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
task: ffff880067184f10 ti: ffff88007ac34000 task.ti: ffff88007ac34000
RIP: 0010:[&amp;lt;ffffffffc0beb664&amp;gt;]  [&amp;lt;ffffffffc0beb664&amp;gt;] dt_txn_hook_commit+0x34/0x60 [obdclass]
RSP: 0018:ffff88007ac37d70  EFLAGS: 00000216
RAX: 0000000000000000 RBX: ffff880067184f78 RCX: ffff8800503c8000
RDX: ffff8800503c8048 RSI: 0000000000000000 RDI: ffff880060fc1000
RBP: ffff88007ac37d80 R08: 0000000000000001 R09: 000000010076d1ab
R10: ffff88004ecb6088 R11: 7fffffffffffffff R12: ffff88004f57a3e0
R13: ffff880050461d80 R14: ffffffff810cdc8c R15: ffff88007ac37d18
FS:  0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055aba7b561d8 CR3: 00000000019f2000 CR4: 00000000000406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
 ffff88004f57a3e0 ffff880060fc1000 ffff88007ac37dc8 ffffffffc07be6e8
 ffff8800503c8000 ffff88007c0a4000 ffff88004f57a3e0 ffff88004f57a490
 0000000000000000 ffff88004f57a480 0000000000000000 ffff88007ac37df8
Call Trace:
 [&amp;lt;ffffffffc07be6e8&amp;gt;] osd_trans_commit_cb+0xe8/0x480 [osd_zfs]
 [&amp;lt;ffffffffc198fce4&amp;gt;] dmu_tx_do_callbacks+0x44/0x70 [zfs]
 [&amp;lt;ffffffffc19db364&amp;gt;] txg_do_callbacks+0x14/0x30 [zfs]
 [&amp;lt;ffffffffc0659ed6&amp;gt;] taskq_thread+0x246/0x470 [spl]
 [&amp;lt;ffffffff810c4820&amp;gt;] ? wake_up_state+0x20/0x20
 [&amp;lt;ffffffffc0659c90&amp;gt;] ? taskq_thread_spawn+0x60/0x60 [spl]
 [&amp;lt;ffffffff810b099f&amp;gt;] kthread+0xcf/0xe0
 [&amp;lt;ffffffff810bf114&amp;gt;] ? finish_task_switch+0x54/0x160
 [&amp;lt;ffffffff810b08d0&amp;gt;] ? insert_kthread_work+0x40/0x40
 [&amp;lt;ffffffff816b4fd8&amp;gt;] ret_from_fork+0x58/0x90
 [&amp;lt;ffffffff810b08d0&amp;gt;] ? insert_kthread_work+0x40/0x40
Code: 41 54 49 89 fc 53 f6 47 14 02 75 42 48 8b 0f 48 8b 41 48 48 8d 51 48 48 39 d0 48 8d 58 d8 74 2e 0f 1f 80 00 00 00 00 48 8b 43 10 &amp;lt;48&amp;gt; 85 c0 74 0d 48 8b 73 18 4c 89 e7 ff d0 49 8b 0c 24 48 8b 43
Kernel panic - not syncing: softlockup: hung tasks
CPU: 1 PID: 27046 Comm: tx_commit_cb Tainted: P           OEL ------------   3.10.0-693.5.2.el7_lustre.x86_64 #1
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 0000000000000000 000000006a1707aa ffff88007fd03e20 ffffffff816a3e51
 ffff88007fd03ea0 ffffffff8169dd14 0000000000000008 ffff88007fd03eb0
 ffff88007fd03e50 000000006a1707aa ffffffff8102d8af 0000000000000000
Call Trace:
 &amp;lt;IRQ&amp;gt;  [&amp;lt;ffffffff816a3e51&amp;gt;] dump_stack+0x19/0x1b
 [&amp;lt;ffffffff8169dd14&amp;gt;] panic+0xe8/0x20d
 [&amp;lt;ffffffff8102d8af&amp;gt;] ? show_regs+0x5f/0x220
 [&amp;lt;ffffffff8112f661&amp;gt;] watchdog_timer_fn+0x221/0x230
 [&amp;lt;ffffffff8112f440&amp;gt;] ? watchdog+0x40/0x40
 [&amp;lt;ffffffff810b4af4&amp;gt;] __hrtimer_run_queues+0xd4/0x260
 [&amp;lt;ffffffff810b508f&amp;gt;] hrtimer_interrupt+0xaf/0x1d0
 [&amp;lt;ffffffff81053895&amp;gt;] local_apic_timer_interrupt+0x35/0x60
 [&amp;lt;ffffffff816b777d&amp;gt;] smp_apic_timer_interrupt+0x3d/0x50
 [&amp;lt;ffffffff816b5cdd&amp;gt;] apic_timer_interrupt+0x6d/0x80
 &amp;lt;EOI&amp;gt;  [&amp;lt;ffffffffc0beb664&amp;gt;] ? dt_txn_hook_commit+0x34/0x60 [obdclass]
 [&amp;lt;ffffffffc07be6e8&amp;gt;] osd_trans_commit_cb+0xe8/0x480 [osd_zfs]
 [&amp;lt;ffffffffc198fce4&amp;gt;] dmu_tx_do_callbacks+0x44/0x70 [zfs]
 [&amp;lt;ffffffffc19db364&amp;gt;] txg_do_callbacks+0x14/0x30 [zfs]
 [&amp;lt;ffffffffc0659ed6&amp;gt;] taskq_thread+0x246/0x470 [spl]
 [&amp;lt;ffffffff810c4820&amp;gt;] ? wake_up_state+0x20/0x20
 [&amp;lt;ffffffffc0659c90&amp;gt;] ? taskq_thread_spawn+0x60/0x60 [spl]
 [&amp;lt;ffffffff810b099f&amp;gt;] kthread+0xcf/0xe0
 [&amp;lt;ffffffff810bf114&amp;gt;] ? finish_task_switch+0x54/0x160
 [&amp;lt;ffffffff810b08d0&amp;gt;] ? insert_kthread_work+0x40/0x40
 [&amp;lt;ffffffff816b4fd8&amp;gt;] ret_from_fork+0x58/0x90
 [&amp;lt;ffffffff810b08d0&amp;gt;] ? insert_kthread_work+0x40/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="215535" author="yong.fan" created="Thu, 7 Dec 2017 11:56:01 +0000"  >&lt;p&gt;+1 on master:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/c66d5546-db2f-11e7-9840-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/c66d5546-db2f-11e7-9840-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="226032" author="adilger" created="Fri, 13 Apr 2018 21:58:44 +0000"  >&lt;p&gt;+1 on master when testing sanity-lfsck:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/79d0a996-3f51-11e8-960d-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/79d0a996-3f51-11e8-960d-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="226762" author="sarah" created="Wed, 25 Apr 2018 21:30:46 +0000"  >&lt;p&gt;+1 on master tag-2.11.51 DNE zfs sanity-lfsck test_11b&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/22afde2a-477b-11e8-95c0-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/22afde2a-477b-11e8-95c0-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="229049" author="utopiabound" created="Mon, 4 Jun 2018 18:41:05 +0000"  >&lt;p&gt;Is this a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6845&quot; title=&quot;conf-sanity test_30b: kernel panic in dt_txn_hook_commit+0x30 during umount&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6845&quot;&gt;&lt;del&gt;LU-6845&lt;/del&gt;&lt;/a&gt;? Or maybe even/also &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6814&quot; title=&quot;conf-sanity test 23b hangs on unmount&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6814&quot;&gt;&lt;del&gt;LU-6814&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="233647" author="jamesanunez" created="Mon, 17 Sep 2018 22:56:49 +0000"  >&lt;p&gt;Same crash on sanity-quota test 35 at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/9d3c27ca-b995-11e8-8c12-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/9d3c27ca-b995-11e8-8c12-52540065bddc&lt;/a&gt; for Lustre 2.11.55.&lt;/p&gt;</comment>
                            <comment id="234108" author="yujian" created="Fri, 28 Sep 2018 16:37:25 +0000"  >&lt;p&gt;+1 on master branch when testing conf-sanity test 90a:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/7d3b2c1e-c313-11e8-a2b9-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/7d3b2c1e-c313-11e8-a2b9-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="241532" author="bzzz" created="Thu, 7 Feb 2019 08:21:27 +0000"  >&lt;p&gt;I also saw this few times in the reports. looking at the source - it&apos;s not clear why dt_txn_callback_add() and dt_txn_callback_del() don&apos;t use any locks.&lt;/p&gt;</comment>
                            <comment id="241533" author="bzzz" created="Thu, 7 Feb 2019 09:29:30 +0000"  >&lt;p&gt;at initialisation tgt_init() is called before mdt_fs_setup(). thus dt_txn_hook_commit() can race against dt_txn_callback_add(). similarly dt_txn_callback_del() from mdt_fs_cleanup() can race as there is no barrier for in-flight transaction.&lt;br/&gt;
the trivial solution would be to use a semaphore (we can&apos;t use spinlocks there), but ZFS may call commit callbacks on few cores and that can be a performance issue.&lt;/p&gt;</comment>
                            <comment id="241534" author="bzzz" created="Thu, 7 Feb 2019 09:48:30 +0000"  >&lt;p&gt;$ findsrc dtc_txn_commit.*=&lt;br/&gt;
./lustre/target/tgt_main.c:	lut-&amp;gt;lut_txn_cb.dtc_txn_commit = NULL;&lt;br/&gt;
./lustre/mdt/mdt_recovery.c:	mdt-&amp;gt;mdt_txn_cb.dtc_txn_commit = NULL;&lt;/p&gt;

&lt;p&gt;I guess we can remove dt_txn_hook_commit()&lt;/p&gt;</comment>
                            <comment id="241574" author="adilger" created="Thu, 7 Feb 2019 23:36:16 +0000"  >&lt;p&gt;+1 on b2_10: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/af0788a8-2693-11e9-b901-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/af0788a8-2693-11e9-b901-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="241577" author="gerrit" created="Fri, 8 Feb 2019 03:35:45 +0000"  >&lt;p&gt;Alex Zhuravlev (bzzz@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34212&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34212&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt; dt: remove dt_txn_hook_commit()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b46892faa8c9b1f174d767f00ba4268715b4030e&lt;/p&gt;</comment>
                            <comment id="242105" author="mdiep" created="Sat, 16 Feb 2019 16:14:12 +0000"  >&lt;p&gt;+1 &lt;a href=&quot;https://testing.whamcloud.com/test_sessions/f220c6df-ec28-4946-ad8e-c861d57e5a64&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/f220c6df-ec28-4946-ad8e-c861d57e5a64&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="242185" author="bzzz" created="Mon, 18 Feb 2019 13:04:20 +0000"  >&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/7794d909-1bc3-4be5-a75f-d01c4b79a1b5/retest&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/7794d909-1bc3-4be5-a75f-d01c4b79a1b5/retest&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="242187" author="gerrit" created="Mon, 18 Feb 2019 13:14:13 +0000"  >&lt;p&gt;Alex Zhuravlev (bzzz@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34274&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34274&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt; dt: remove dt_txn_hook_commit()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2b82e3c36bde3ec0d2c1b314b48b392bdfc92518&lt;/p&gt;</comment>
                            <comment id="242348" author="pfarrell" created="Wed, 20 Feb 2019 15:25:01 +0000"  >&lt;p&gt;Also saw this with conf-sanity 5e:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/4040ad08-d8aa-4066-994d-28e0bf82036f&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/4040ad08-d8aa-4066-994d-28e0bf82036f&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="242675" author="pfarrell" created="Mon, 25 Feb 2019 16:05:34 +0000"  >&lt;p&gt;Master:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/4040ad08-d8aa-4066-994d-28e0bf82036f&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/4040ad08-d8aa-4066-994d-28e0bf82036f&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="242899" author="gerrit" created="Wed, 27 Feb 2019 02:02:59 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34212/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34212/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt; dt: remove dt_txn_hook_commit()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: e763467ebe00913e8d03f855dc4b918b95099931&lt;/p&gt;</comment>
                            <comment id="242926" author="pjones" created="Wed, 27 Feb 2019 05:37:32 +0000"  >&lt;p&gt;Landed for 2.13&lt;/p&gt;</comment>
                            <comment id="243204" author="gerrit" created="Sat, 2 Mar 2019 01:30:33 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34274/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34274/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt; dt: remove dt_txn_hook_commit()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4b239e9138aa69eb996ee47b31f075c085efa1e5&lt;/p&gt;</comment>
                            <comment id="243221" author="bzzz" created="Sat, 2 Mar 2019 14:57:09 +0000"  >&lt;p&gt;Peter, ldiskfs was vulnerable as well.&lt;/p&gt;</comment>
                            <comment id="244589" author="gerrit" created="Sat, 23 Mar 2019 15:29:44 +0000"  >&lt;p&gt;Minh Diep (mdiep@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34496&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34496&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt; dt: remove dt_txn_hook_commit()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 3a518aa8871ce41c03c44d7daeb6b8eb6c077d97&lt;/p&gt;</comment>
                            <comment id="244979" author="gerrit" created="Mon, 1 Apr 2019 06:19:37 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34496/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34496/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9706&quot; title=&quot;conf-sanity test_53a: MDS soft lockup - CPU#0 stuck for 22s in osd_trans_commit_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9706&quot;&gt;&lt;del&gt;LU-9706&lt;/del&gt;&lt;/a&gt; dt: remove dt_txn_hook_commit()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 6c2ecb0c72510cad32210ec327122c2b140eb6cc&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="31006">LU-6814</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="53615">LU-11528</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzfl3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>