<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:52:56 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5605] Interop 2.5.2&lt;-&gt;2.7 lustre-rsync-test test_6: soft lockup on statahead</title>
                <link>https://jira.whamcloud.com/browse/LU-5605</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/7e94d1f8-32e1-11e4-9c61-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/7e94d1f8-32e1-11e4-9c61-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_6 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;test log&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;22:57:14:Lustre: DEBUG MARKER: == lustre-rsync-test test 6: lustre_rsync large no of hard links == 22:55:14 (1409637314)
22:57:14:BUG: soft lockup - CPU#0 stuck for 67s! [ll_sa_27678:27679]
22:57:14:Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) sha512_generic sha256_generic nfs fscache nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs autofs4 ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
22:57:14:BUG: soft lockup - CPU#1 stuck for 67s! [ptlrpcd_0:1399]
22:57:14:Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) sha512_generic sha256_generic nfs fscache nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs autofs4 ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
22:57:14:CPU 1 
22:57:14:Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) sha512_generic sha256_generic nfs fscache nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs autofs4 ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
22:57:14:
22:57:14:Pid: 1399, comm: ptlrpcd_0 Not tainted 2.6.32-431.17.1.el6.x86_64 #1 Red Hat KVM
22:57:14:RIP: 0010:[&amp;lt;ffffffff8152a5ee&amp;gt;]  [&amp;lt;ffffffff8152a5ee&amp;gt;] _spin_lock+0x1e/0x30
22:57:14:RSP: 0018:ffff88007c48dc20  EFLAGS: 00000206
22:57:14:RAX: 0000000000000002 RBX: ffff88007c48dc20 RCX: 5a5a5a5a5a5a5a5a
22:57:14:RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88005302f440
22:57:14:RBP: ffffffff8100bb8e R08: 5a5a5a5a5a5a5a5a R09: 5a5a5a5a5a5a5a5a
22:57:14:R10: 0000000000000000 R11: 0000000000000078 R12: 0000000000000000
22:57:14:R13: ffff88007cbe12d8 R14: 0000000000000000 R15: ffffffff00000000
22:57:14:FS:  0000000000000000(0000) GS:ffff880002300000(0000) knlGS:0000000000000000
22:57:14:CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
22:57:14:CR2: 0000000001eb81c0 CR3: 0000000001a85000 CR4: 00000000000006e0
22:57:14:DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
22:57:14:DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
22:57:14:Process ptlrpcd_0 (pid: 1399, threadinfo ffff88007c48c000, task ffff88007a29d540)
22:57:14:Stack:
22:57:14: ffff88007c48dc80 ffffffffa1083c5b 0000000000000000 ffff880049195f48
22:57:14:&amp;lt;d&amp;gt; ffff88005302f440 ffff880060ac6800 ffff880060ac6800 ffff88007adc0480
22:57:14:&amp;lt;d&amp;gt; ffff880060ac6800 ffff880049195e00 0000000000000000 ffff88007c641400
22:57:14:Call Trace:
22:57:14: [&amp;lt;ffffffffa1083c5b&amp;gt;] ? ll_statahead_interpret+0x7b/0x560 [lustre]
22:57:14: [&amp;lt;ffffffffa0efa736&amp;gt;] ? mdc_intent_getattr_async_interpret+0x1f6/0x540 [mdc]
22:57:14: [&amp;lt;ffffffffa0d3caec&amp;gt;] ? ptlrpc_check_set+0x2bc/0x1b50 [ptlrpc]
22:57:14: [&amp;lt;ffffffffa0d6804b&amp;gt;] ? ptlrpcd_check+0x53b/0x560 [ptlrpc]
22:57:14: [&amp;lt;ffffffffa0d6856b&amp;gt;] ? ptlrpcd+0x20b/0x370 [ptlrpc]
22:57:14: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
22:57:14: [&amp;lt;ffffffffa0d68360&amp;gt;] ? ptlrpcd+0x0/0x370 [ptlrpc]
22:57:14: [&amp;lt;ffffffff8109ab56&amp;gt;] ? kthread+0x96/0xa0
22:57:14: [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
22:57:14: [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
22:57:14: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
22:57:14:Code: 00 00 00 01 74 05 e8 22 40 d6 ff c9 c3 55 48 89 e5 0f 1f 44 00 00 b8 00 00 01 00 f0 0f c1 07 0f b7 d0 c1 e8 10 39 c2 74 0e f3 90 &amp;lt;0f&amp;gt; 1f 44 00 00 83 3f 00 75 f4 eb df c9 c3 0f 1f 40 00 55 48 89 
22:57:14:Call Trace:
22:57:14: [&amp;lt;ffffffffa1083c5b&amp;gt;] ? ll_statahead_interpret+0x7b/0x560 [lustre]
22:57:14: [&amp;lt;ffffffffa0efa736&amp;gt;] ? mdc_intent_getattr_async_interpret+0x1f6/0x540 [mdc]
22:57:14: [&amp;lt;ffffffffa0d3caec&amp;gt;] ? ptlrpc_check_set+0x2bc/0x1b50 [ptlrpc]
22:57:14: [&amp;lt;ffffffffa0d6804b&amp;gt;] ? ptlrpcd_check+0x53b/0x560 [ptlrpc]
22:57:14: [&amp;lt;ffffffffa0d6856b&amp;gt;] ? ptlrpcd+0x20b/0x370 [ptlrpc]
22:57:14: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
22:57:14: [&amp;lt;ffffffffa0d68360&amp;gt;] ? ptlrpcd+0x0/0x370 [ptlrpc]
22:57:14: [&amp;lt;ffffffff8109ab56&amp;gt;] ? kthread+0x96/0xa0
22:57:14: [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
22:57:14: [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
22:57:14: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
22:57:14:CPU 0 
22:58:35:Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) sha512_generic sha256_generic nfs fscache nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs autofs4 ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
22:58:35:
22:58:35:Pid: 27679, comm: ll_sa_27678 Not tainted 2.6.32-431.17.1.el6.x86_64 #1 Red Hat KVM
22:58:35:RIP: 0010:[&amp;lt;ffffffff8152a5ee&amp;gt;]  [&amp;lt;ffffffff8152a5ee&amp;gt;] _spin_lock+0x1e/0x30
22:58:35:RSP: 0018:ffff880068113d50  EFLAGS: 00000206
22:58:35:RAX: 0000000000000001 RBX: ffff880068113d50 RCX: 0000000000000000
22:58:35:RDX: 0000000000000000 RSI: ffff8800531765c0 RDI: ffff88005302f440
22:58:35:RBP: ffffffff8100bb8e R08: 0000000000000000 R09: 00000000fffffffe
22:58:35:R10: 0000000000000000 R11: 0000000000000001 R12: ffff880068113d30
22:58:35:R13: ffff8800228a4400 R14: 0000000000001000 R15: 0000000000000000
22:58:35:FS:  0000000000000000(0000) GS:ffff880002200000(0000) knlGS:0000000000000000
22:58:35:CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
22:58:35:CR2: 00000000019ea0f8 CR3: 00000000253dd000 CR4: 00000000000006f0
22:58:35:DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
22:58:35:DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
22:58:35:Process ll_sa_27678 (pid: 27679, threadinfo ffff880068112000, task ffff88002879aaa0)
22:58:35:Stack:
22:58:35: ffff880068113dc0 ffffffffa1084cc0 ffff88005302f178 ffff880033595670
22:58:35:&amp;lt;d&amp;gt; 0000000000000000 ffff88005302f080 ffff88005dc395c0 ffff88002513f600
22:58:35:&amp;lt;d&amp;gt; ffff880068113dc0 ffff880028433000 ffff880028433170 ffff88005302f400
22:58:35:Call Trace:
22:58:35: [&amp;lt;ffffffffa1084cc0&amp;gt;] ? ll_post_statahead+0x50/0xa20 [lustre]
22:58:35: [&amp;lt;ffffffffa1088ce8&amp;gt;] ? ll_statahead_thread+0x228/0xfb0 [lustre]
22:58:35: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
22:58:35: [&amp;lt;ffffffffa1088ac0&amp;gt;] ? ll_statahead_thread+0x0/0xfb0 [lustre]
22:58:35: [&amp;lt;ffffffff8109ab56&amp;gt;] ? kthread+0x96/0xa0
22:58:35: [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
22:58:35: [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
22:58:35: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
22:58:35:Code: 00 00 00 01 74 05 e8 22 40 d6 ff c9 c3 55 48 89 e5 0f 1f 44 00 00 b8 00 00 01 00 f0 0f c1 07 0f b7 d0 c1 e8 10 39 c2 74 0e f3 90 &amp;lt;0f&amp;gt; 1f 44 00 00 83 3f 00 75 f4 eb df c9 c3 0f 1f 40 00 55 48 89 
22:58:35:Call Trace:
22:58:35: [&amp;lt;ffffffffa1084cc0&amp;gt;] ? ll_post_statahead+0x50/0xa20 [lustre]
22:58:35: [&amp;lt;ffffffffa1088ce8&amp;gt;] ? ll_statahead_thread+0x228/0xfb0 [lustre]
22:58:35: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
22:58:35: [&amp;lt;ffffffffa1088ac0&amp;gt;] ? ll_statahead_thread+0x0/0xfb0 [lustre]
22:58:35: [&amp;lt;ffffffff8109ab56&amp;gt;] ? kthread+0x96/0xa0
22:58:35: [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
22:58:35: [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
22:58:35: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Info required for matching: lustre-rsync-test 6&lt;/p&gt;</description>
                <environment>server: lustre-master build # 2639&lt;br/&gt;
client: 2.5.2</environment>
        <key id="26477">LU-5605</key>
            <summary>Interop 2.5.2&lt;-&gt;2.7 lustre-rsync-test test_6: soft lockup on statahead</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Wed, 10 Sep 2014 18:12:35 +0000</created>
                <updated>Sat, 31 Jan 2015 20:22:03 +0000</updated>
                            <resolved>Sat, 31 Jan 2015 20:21:44 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                    <version>Lustre 2.5.4</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="94685" author="yujian" created="Tue, 23 Sep 2014 03:00:34 +0000"  >&lt;p&gt;While testing patch &lt;a href=&quot;http://review.whamcloud.com/11006&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11006&lt;/a&gt; on Lustre b2_5 branch, the same failure occurred:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/61990438-416a-11e4-a787-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/61990438-416a-11e4-a787-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="95046" author="bogl" created="Fri, 26 Sep 2014 14:33:32 +0000"  >&lt;p&gt;another instance on master:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/37d77be2-4553-11e4-92e5-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/37d77be2-4553-11e4-92e5-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;not seen with interop, both client and server were master&lt;/p&gt;</comment>
                            <comment id="95815" author="yujian" created="Tue, 7 Oct 2014 16:25:54 +0000"  >&lt;p&gt;One more instance on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/72654654-4deb-11e4-8fdd-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/72654654-4deb-11e4-8fdd-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="96211" author="jhammond" created="Mon, 13 Oct 2014 14:21:04 +0000"  >&lt;p&gt;There are now some crash dumps for this on shadow:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[jlhammon@shadow-1 ~]$ find /export/scratch/dumps -name &apos;vmcore-dmesg.txt&apos; -exec grep softlockup {} \; -print
&amp;lt;0&amp;gt;Kernel panic - not syncing: softlockup: hung tasks
/export/scratch/dumps/shadow-22vm5.shadow.whamcloud.com/10.1.5.13-2014-10-11-16:59:41/vmcore-dmesg.txt
&amp;lt;0&amp;gt;Kernel panic - not syncing: softlockup: hung tasks
/export/scratch/dumps/shadow-11vm5.shadow.whamcloud.com/10.1.4.124-2014-10-11-12:41:27/vmcore-dmesg.txt
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;4&amp;gt;Lustre: DEBUG MARKER: == lustre-rsync-test test 6: lustre_rsync large no of hard links
== 16:57:53 (1413046673)
&amp;lt;0&amp;gt;BUG: soft lockup - CPU#0 stuck for 68s! [ptlrpcd_0:8469]
&amp;lt;4&amp;gt;Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) ext2 sha512_generic sha256_generic nfsd exportfs nfs lockd fscache auth_rpcgss nfs_\
acl sunrpc ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_a\
cpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
&amp;lt;4&amp;gt;CPU 0
&amp;lt;4&amp;gt;Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) ext2 sha512_generic
&amp;lt;0&amp;gt;BUG: soft lockup - CPU#1 stuck for 68s! [ll_sa_18280:18281]
&amp;lt;4&amp;gt;Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) ext2 sha512_generic sha256_generic nfsd exportfs nfs lockd fscache auth_rpcgss nfs_\
acl sunrpc ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_a\
cpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
&amp;lt;4&amp;gt;CPU 1
&amp;lt;4&amp;gt;Modules linked in: lustre(U) obdecho(U) mgc(U) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) libcfs(U) ext2 sha512_generic sha256_generic nfsd exportfs nfs lockd fscache auth_rpcgss nfs_\
acl sunrpc ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_a\
cpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Pid: 18281, comm: ll_sa_18280 Not tainted 2.6.32-431.29.2.el6.x86_64 #1 Red Hat KVM
&amp;lt;4&amp;gt;RIP: 0010:[&amp;lt;ffffffff8152b84e&amp;gt;]  [&amp;lt;ffffffff8152b84e&amp;gt;] _spin_lock+0x1e/0x30
&amp;lt;4&amp;gt;RSP: 0018:ffff880071027d50  EFLAGS: 00000206
&amp;lt;4&amp;gt;RAX: 0000000000000001 RBX: ffff880071027d50 RCX: 0000000000000000
&amp;lt;4&amp;gt;RDX: 0000000000000000 RSI: ffff88006f314140 RDI: ffff8800699cb440
&amp;lt;4&amp;gt;RBP: ffffffff8100bb8e R08: 0000000000000000 R09: 0000000000000000
&amp;lt;4&amp;gt;R10: 0000000000000800 R11: 0000000000000400 R12: ffff880071027d30
&amp;lt;4&amp;gt;R13: ffff880070618000 R14: 0000000000001000 R15: 0000000000000000
&amp;lt;4&amp;gt;FS:  0000000000000000(0000) GS:ffff880002300000(0000) knlGS:0000000000000000
&amp;lt;4&amp;gt;CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
&amp;lt;4&amp;gt;CR2: 000000000091362c CR3: 000000006e5d9000 CR4: 00000000000006e0
&amp;lt;4&amp;gt;DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
&amp;lt;4&amp;gt;DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
&amp;lt;4&amp;gt;Process ll_sa_18280 (pid: 18281, threadinfo ffff880071026000, task ffff88006e824040)
&amp;lt;4&amp;gt;Stack:
&amp;lt;4&amp;gt; ffff880071027dc0 ffffffffa10840b0 ffff8800699cb178 ffff880060b7b630
&amp;lt;4&amp;gt;&amp;lt;d&amp;gt; 0000000000000000 ffff8800699cb080 ffff88007a9a1280 ffff88006e44e800
&amp;lt;4&amp;gt;&amp;lt;d&amp;gt; ffff880071027dc0 ffff88006b166800 ffff88006b166970 ffff8800699cb400
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa10840b0&amp;gt;] ? ll_post_statahead+0x50/0xa20 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa10880d8&amp;gt;] ? ll_statahead_thread+0x228/0xfb0 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa1087eb0&amp;gt;] ? ll_statahead_thread+0x0/0xfb0 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109abf6&amp;gt;] ? kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;Code: 00 00 00 01 74 05 e8 92 3a d6 ff c9 c3 55 48 89 e5 0f 1f 44 00 00 b8 00 00 01 00 f0 0f c1 07 0f b7 d0 c1 e8 10 39 c2 74 0e f3 90 &amp;lt;0f&amp;gt; 1f 44 00 00 83 3f 00 75 f4 eb df c9 c3 0f 1f 40 00 55 48 89
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa10840b0&amp;gt;] ? ll_post_statahead+0x50/0xa20 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa10880d8&amp;gt;] ? ll_statahead_thread+0x228/0xfb0 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa1087eb0&amp;gt;] ? ll_statahead_thread+0x0/0xfb0 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109abf6&amp;gt;] ? kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;0&amp;gt;Kernel panic - not syncing: softlockup: hung tasks
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="97414" author="jhammond" created="Fri, 24 Oct 2014 16:17:48 +0000"  >
&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/89fdd20c-5b58-11e4-9243-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/89fdd20c-5b58-11e4-9243-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_nodes/301f0368-5b4f-11e4-9243-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_nodes/301f0368-5b4f-11e4-9243-5254006e85c2&lt;/a&gt;&lt;br/&gt;
export/scratch/dumps/shadow-19vm10.shadow.whamcloud.com/10.1.4.233-2014-10-23-20:48:50/vmcore-dmesg.txt&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: == lustre-rsync-test test 6: lustre_rsync large no of hard links == 20:47:00 (14140972
20)
BUG: soft lockup - CPU#0 stuck for 67s! [ptlrpcd_1:2595]
...
CPU 0 
BUG: soft lockup - CPU#1 stuck for 67s! [ll_sa_13692:13693]
...
CPU 1 
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I don&apos;t know why it says procfs_read_helper below but it&apos;s really ll_statahead_interpret().&lt;/p&gt;

&lt;p&gt;ptlrpcd_0 is in ll_statahead_interpret() holding lli_sa_lock and calling ll_intent_drop_lock().&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt 2594
PID: 2594   TASK: ffff88007ad7f500  CPU: 1   COMMAND: &quot;ptlrpcd_0&quot;
 #0 [ffff880043be78c0] schedule at ffffffff81528e00
 #1 [ffff880043be7988] __cond_resched at ffffffff810695fa
 #2 [ffff880043be79a8] _cond_resched at ffffffff81529840
 #3 [ffff880043be79b8] kmem_cache_alloc_trace at ffffffff8116f445
 #4 [ffff880043be7a08] ldlm_bl_to_thread at ffffffffa0e134f8 [ptlrpc]
 #5 [ffff880043be7b08] ldlm_bl_to_thread_lock at ffffffffa0e13a29 [ptlrpc]
 #6 [ffff880043be7b18] ldlm_lock_decref_internal at ffffffffa0dfefc5 [ptlrpc]
 #7 [ffff880043be7b78] ldlm_lock_decref at ffffffffa0e00079 [ptlrpc]
 #8 [ffff880043be7ba8] ll_intent_drop_lock at ffffffffa1157bd1 [lustre]
 #9 [ffff880043be7bd8] procfs_read_helper at ffffffffa11bc2ae [lustre]
#10 [ffff880043be7c38] mdc_intent_getattr_async_interpret at ffffffffa103fa5a [mdc]
#11 [ffff880043be7cb8] ptlrpc_check_set at ffffffffa0e28a63 [ptlrpc]
#12 [ffff880043be7d78] ptlrpcd_check at ffffffffa0e55153 [ptlrpc]
#13 [ffff880043be7dd8] ptlrpcd at ffffffffa0e5579b [ptlrpc]
#14 [ffff880043be7ee8] kthread at ffffffff8109abf6
#15 [ffff880043be7f48] kernel_thread at ffffffff8100c20a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;This is the problem since we have a non atomic kmalloc() in ldlm_bl_to_thread():&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; ldlm_bl_to_thread(struct ldlm_namespace *ns,
                             struct ldlm_lock_desc *ld,
                             struct ldlm_lock *lock,
                             struct list_head *cancels, &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; count,
                             ldlm_cancel_flags_t cancel_flags)
{
        ENTRY;

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (cancels &amp;amp;&amp;amp; count == 0)
                RETURN(0);

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (cancel_flags &amp;amp; LCF_ASYNC) {
                struct ldlm_bl_work_item *blwi;

                OBD_ALLOC(blwi, sizeof(*blwi));
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (blwi == NULL)
                        RETURN(-ENOMEM);
                init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags);

                RETURN(__ldlm_bl_to_thread(blwi, cancel_flags));
       }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;ptlrpcd_1 is in ll_statahead_interpret() trying to take lli_sa_lock:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt 2595
PID: 2595   TASK: ffff88007ad7eaa0  CPU: 0   COMMAND: &quot;ptlrpcd_1&quot;
 #0 [ffff880002207e90] crash_nmi_callback at ffffffff8102fee6
 #1 [ffff880002207ea0] notifier_call_chain at ffffffff8152ea55
 #2 [ffff880002207ee0] atomic_notifier_call_chain at ffffffff8152eaba
 #3 [ffff880002207ef0] notify_die at ffffffff810a11de
 #4 [ffff880002207f20] do_nmi at ffffffff8152c71b
 #5 [ffff880002207f50] nmi at ffffffff8152bfe0
    [exception RIP: native_read_tsc]
    RIP: ffffffff810156b0  RSP: ffff880002203b00  RFLAGS: 00000046
    RAX: 00000000673d3da9  RBX: 0000000000000000  RCX: 00000000673d3da9
    RDX: 00000000000006cc  RSI: 0000000000291984  RDI: 0000000000000a86
    RBP: ffff880002203b28   R8: ffffffff81c068c0   R9: 0000000000000000
    R10: 696c2073656c7564  R11: 3a6e692064656b6e  R12: 00000000673d36dd
    R13: 0000000000000000  R14: 0000000000000a86  R15: ffffffff81364eb0
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- &amp;lt;NMI exception stack&amp;gt; ---
 #6 [ffff880002203b00] native_read_tsc at ffffffff810156b0
 #7 [ffff880002203b00] delay_tsc at ffffffff8128e2ba
 #8 [ffff880002203b30] __const_udelay at ffffffff8128e266
 #9 [ffff880002203b40] wait_for_xmitr at ffffffff81364e32
#10 [ffff880002203b70] serial8250_console_putchar at ffffffff81364ed6
#11 [ffff880002203b90] uart_console_write at ffffffff8136086e
#12 [ffff880002203bd0] serial8250_console_write at ffffffff8136523d
#13 [ffff880002203c20] __call_console_drivers at ffffffff81071cd5
#14 [ffff880002203c50] _call_console_drivers at ffffffff81071d3a
#15 [ffff880002203c70] release_console_sem at ffffffff81072388
#16 [ffff880002203cb0] vprintk at ffffffff81072a88
#17 [ffff880002203d50] printk at ffffffff81528845
#18 [ffff880002203db0] print_modules at ffffffff810b77b7
#19 [ffff880002203de0] show_registers at ffffffff8100fd46
#20 [ffff880002203e50] show_regs at ffffffff81009c16
#21 [ffff880002203e70] watchdog_timer_fn at ffffffff810e6392
#22 [ffff880002203ea0] __run_hrtimer at ffffffff8109f6be
#23 [ffff880002203ef0] hrtimer_interrupt at ffffffff8109fa26
#24 [ffff880002203f70] local_apic_timer_interrupt at ffffffff81031f1d
#25 [ffff880002203f90] smp_apic_timer_interrupt at ffffffff815325e5
#26 [ffff880002203fb0] apic_timer_interrupt at ffffffff8100bb93
--- &amp;lt;IRQ stack&amp;gt; ---
#27 [ffff8800698f1b28] apic_timer_interrupt at ffffffff8100bb93
    [exception RIP: _spin_lock+30]
    RIP: ffffffff8152b84e  RSP: ffff8800698f1bd0  RFLAGS: 00000206
    RAX: 0000000000000001  RBX: ffff8800698f1bd0  RCX: 5a5a5a5a5a5a5a5a
    RDX: 0000000000000000  RSI: ffff88003dd9bc00  RDI: ffff88007b4aa478
    RBP: ffffffff8100bb8e   R8: 5a5a5a5a5a5a5a5a   R9: 00000000fffffffe
    R10: 0000000000000000  R11: 00000000000000be  R12: 0000000000000006
    R13: ffff88006b5ed378  R14: ffff8800698f1b60  R15: ffff88007b9fea00
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
#28 [ffff8800698f1bd8] procfs_read_helper at ffffffffa11bc089 [lustre]
#29 [ffff8800698f1c38] mdc_intent_getattr_async_interpret at ffffffffa103fa5a [mdc]
#30 [ffff8800698f1cb8] ptlrpc_check_set at ffffffffa0e28a63 [ptlrpc]
#31 [ffff8800698f1d78] ptlrpcd_check at ffffffffa0e55153 [ptlrpc]
#32 [ffff8800698f1dd8] ptlrpcd at ffffffffa0e5579b [ptlrpc]
#33 [ffff8800698f1ee8] kthread at ffffffff8109abf6
#34 [ffff8800698f1f48] kernel_thread at ffffffff8100c20a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;ll_sa_13692 is in ll_post_statahead() trying to take lli_sa_lock:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt
PID: 13693  TASK: ffff88007cfb9540  CPU: 1   COMMAND: &quot;ll_sa_13692&quot;
 #0 [ffff880002303cc0] machine_kexec at ffffffff81038f3b
 #1 [ffff880002303d20] crash_kexec at ffffffff810c5af2
 #2 [ffff880002303df0] panic at ffffffff81528743
 #3 [ffff880002303e70] watchdog_timer_fn at ffffffff810e63ca
 #4 [ffff880002303ea0] __run_hrtimer at ffffffff8109f6be
 #5 [ffff880002303ef0] hrtimer_interrupt at ffffffff8109fa26
 #6 [ffff880002303f70] local_apic_timer_interrupt at ffffffff81031f1d
 #7 [ffff880002303f90] smp_apic_timer_interrupt at ffffffff815325e5
 #8 [ffff880002303fb0] apic_timer_interrupt at ffffffff8100bb93
--- &amp;lt;IRQ stack&amp;gt; ---
 #9 [ffff8800737c1c68] apic_timer_interrupt at ffffffff8100bb93
    [exception RIP: _spin_lock+30]
    RIP: ffffffff8152b84e  RSP: ffff8800737c1d10  RFLAGS: 00000206
    RAX: 0000000000000002  RBX: ffff8800737c1d10  RCX: 0000000000000008
    RDX: 0000000000000000  RSI: 0000000000000005  RDI: ffff88007b4aa478
    RBP: ffffffff8100bb8e   R8: 0000000000000000   R9: 00000000fffffffe
    R10: 0000000000000000  R11: 0000000000000001  R12: 0000000000000000
    R13: 0000000000000000  R14: 0000000000000003  R15: ffff88003dcc55a8
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
#10 [ffff8800737c1d18] ll_post_statahead at ffffffffa11bceed [lustre]
#11 [ffff8800737c1dc8] ta.73028 at ffffffffa11c158b [lustre]
#12 [ffff8800737c1ee8] kthread at ffffffff8109abf6
#13 [ffff8800737c1f48] kernel_thread at ffffffff8100c20a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="97506" author="yujian" created="Sat, 25 Oct 2014 18:00:42 +0000"  >&lt;p&gt;One more instance on master branch: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/8883d38c-5b57-11e4-9243-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/8883d38c-5b57-11e4-9243-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="98174" author="jhammond" created="Mon, 3 Nov 2014 14:24:34 +0000"  >&lt;p&gt;This is likely fixed by the landing of &lt;a href=&quot;http://review.whamcloud.com/9665&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9665&lt;/a&gt; from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2272&quot; title=&quot;BUG: spinlock cpu recursion on CPU#2, ll_sa_30972/30992&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2272&quot;&gt;&lt;del&gt;LU-2272&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="105301" author="yujian" created="Sat, 31 Jan 2015 20:21:23 +0000"  >&lt;blockquote&gt;&lt;p&gt;This is likely fixed by the landing of &lt;a href=&quot;http://review.whamcloud.com/9665&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9665&lt;/a&gt; from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2272&quot; title=&quot;BUG: spinlock cpu recursion on CPU#2, ll_sa_30972/30992&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2272&quot;&gt;&lt;del&gt;LU-2272&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;The patch needs to be back-ported to Lustre b2_5 and b_ieel2_0 branches. The work will be tracked in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4410&quot; title=&quot;sanityn test 40a: BUG: soft lockup - CPU#0 stuck for 67s! [ptlrpcd_0:2892]&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4410&quot;&gt;&lt;del&gt;LU-4410&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="22553">LU-4410</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwvxz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15678</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>