<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:35:58 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10537] softlockup in ldlm_prepare_lru_list()</title>
                <link>https://jira.whamcloud.com/browse/LU-10537</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This was while running  parallel-scale test mdtestssf on spirit on 2.10.3-RC1.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jan 11 00:32:07 spirit-14 kernel: Lustre: DEBUG MARKER: == parallel-scale test mdtestssf: mdtestssf ================
========================================== 00:32:06 (1515630726)
... unrelated messages ...
Jan 11 00:39:55 spirit-14 kernel: perf: interrupt took too long (2505 &amp;gt; 2500), lowering kernel.perf_event_max_sample
_rate to 79000
Jan 11 00:42:44 spirit-14 kernel: NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [ldlm_poold:1918]
Jan 11 00:42:44 spirit-14 kernel: Modules linked in: osc(OE) mgc(OE) lustre(OE) lmv(OE) fld(OE) mdc(OE) fid(OE) lov(
OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma 
ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp sc
si_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_core sb_edac edac_core intel_powerc
lamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul 
glue_helper ablk_helper cryptd joydev iTCO_wdt iTCO_vendor_support pcspkr sg ipmi_si shpchp ipmi_devintf wmi ipmi_ms
ghandler mei_me mei ioatdma lpc_ich i2c_i801 nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables ext4 mbcache jbd2
 sd_mod crc_t10dif
Jan 11 00:42:44 spirit-14 kernel: mlx4_en crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt
 fb_sys_fops ttm igb isci crct10dif_pclmul crct10dif_common libsas ahci ptp drm mlx4_core crc32c_intel libahci scsi_
transport_sas pps_core libata dca i2c_algo_bit i2c_core devlink
Jan 11 00:42:44 spirit-14 kernel: CPU: 2 PID: 1918 Comm: ldlm_poold Tainted: G           OE  ------------   3.10.0-6
93.11.6.el7.x86_64 #1
Jan 11 00:42:44 spirit-14 kernel: Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.01.0002.0822
20131453 08/22/2013
Jan 11 00:42:44 spirit-14 kernel: task: ffff881016b26eb0 ti: ffff881013300000 task.ti: ffff881013300000
Jan 11 00:42:44 spirit-14 kernel: RIP: 0010:[&amp;lt;ffffffff810fc036&amp;gt;]  [&amp;lt;ffffffff810fc036&amp;gt;] native_queued_spin_lock_slowp
ath+0x116/0x1e0
Jan 11 00:42:44 spirit-14 kernel: RSP: 0018:ffff881013303c90  EFLAGS: 00000246
Jan 11 00:42:44 spirit-14 kernel: RAX: 0000000000000000 RBX: 0000000000110000 RCX: 0000000000110000
Jan 11 00:42:44 spirit-14 kernel: RDX: ffff88101da195c0 RSI: 0000000000a10001 RDI: ffff8808113f6818
Jan 11 00:42:44 spirit-14 kernel: RBP: ffff881013303c90 R08: ffff88081e0995c0 R09: 0000000000000000
Jan 11 00:42:44 spirit-14 kernel: R10: 000007c0f1a77165 R11: 0000000000000400 R12: ffff88081e0995c0
Jan 11 00:42:44 spirit-14 kernel: R13: ffff88101d9195c0 R14: 000007c0f1a77165 R15: 0000000000000400
Jan 11 00:42:44 spirit-14 kernel: FS:  0000000000000000(0000) GS:ffff88081e080000(0000) knlGS:0000000000000000
Jan 11 00:42:44 spirit-14 kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Jan 11 00:42:44 spirit-14 kernel: CR2: 00007f51e2a62e88 CR3: 00000000019fa000 CR4: 00000000001607e0
Jan 11 00:42:44 spirit-14 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Jan 11 00:42:44 spirit-14 kernel: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Jan 11 00:42:44 spirit-14 kernel: Call Trace:
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff816a070f&amp;gt;] queued_spin_lock_slowpath+0xb/0xf
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff816ade20&amp;gt;] _raw_spin_lock+0x20/0x30
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bc1dd5&amp;gt;] ldlm_prepare_lru_list+0x325/0x4e0 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bbfca0&amp;gt;] ? ldlm_iter_helper+0x20/0x20 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bc6f01&amp;gt;] ldlm_cancel_lru+0x61/0x170 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bd2191&amp;gt;] ldlm_cli_pool_recalc+0x231/0x240 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bd27e9&amp;gt;] ldlm_pool_recalc+0x109/0x1d0 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bd40a4&amp;gt;] ldlm_pools_recalc+0x224/0x3d0 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bd42e5&amp;gt;] ldlm_pools_thread_main+0x95/0x330 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff810c6440&amp;gt;] ? wake_up_state+0x20/0x20
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffffc0bd4250&amp;gt;] ? ldlm_pools_recalc+0x3d0/0x3d0 [ptlrpc]
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff810b252f&amp;gt;] kthread+0xcf/0xe0
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff810b2460&amp;gt;] ? insert_kthread_work+0x40/0x40
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff816b8798&amp;gt;] ret_from_fork+0x58/0x90
Jan 11 00:42:44 spirit-14 kernel: [&amp;lt;ffffffff810b2460&amp;gt;] ? insert_kthread_work+0x40/0x40
Jan 11 00:42:44 spirit-14 kernel: Code: 0d 48 98 83 e2 30 48 81 c2 c0 95 01 00 48 03 14 c5 20 8e b1 81 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 &amp;lt;85&amp;gt; c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 
Jan 11 00:42:44 spirit-14 kernel: sched: RT throttling activated
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Later on a mdtestssf run:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jan 11 02:41:03 spirit-14 kernel: NMI watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [ldlm_poold:1918]
Jan 11 02:41:03 spirit-14 kernel: Modules linked in: osc(OE) mgc(OE) lustre(OE) lmv(OE) fld(OE) mdc(OE) fid(OE) lov(
OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma 
ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp sc
si_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_core sb_edac edac_core intel_powerc
lamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul 
glue_helper ablk_helper cryptd joydev iTCO_wdt iTCO_vendor_support pcspkr sg ipmi_si shpchp ipmi_devintf wmi ipmi_ms
ghandler mei_me mei ioatdma lpc_ich i2c_i801 nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables ext4 mbcache jbd2
 sd_mod crc_t10dif
Jan 11 02:41:03 spirit-14 kernel: mlx4_en crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt
 fb_sys_fops ttm igb isci crct10dif_pclmul crct10dif_common libsas ahci ptp drm mlx4_core crc32c_intel libahci scsi_
transport_sas pps_core libata dca i2c_algo_bit i2c_core devlink
Jan 11 02:41:03 spirit-14 kernel: CPU: 23 PID: 1918 Comm: ldlm_poold Tainted: G           OEL ------------   3.10.0-
693.11.6.el7.x86_64 #1
Jan 11 02:41:03 spirit-14 kernel: Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.01.0002.0822
20131453 08/22/2013
Jan 11 02:41:03 spirit-14 kernel: task: ffff881016b26eb0 ti: ffff881013300000 task.ti: ffff881013300000
Jan 11 02:41:03 spirit-14 kernel: RIP: 0010:[&amp;lt;ffffffff810fc036&amp;gt;]  [&amp;lt;ffffffff810fc036&amp;gt;] native_queued_spin_lock_slowp
ath+0x116/0x1e0
Jan 11 02:41:03 spirit-14 kernel: RSP: 0018:ffff881013303c90  EFLAGS: 00000246
Jan 11 02:41:03 spirit-14 kernel: RAX: 0000000000000000 RBX: 0000000000b90000 RCX: 0000000000b90000
Jan 11 02:41:03 spirit-14 kernel: RDX: ffff88081e1995c0 RSI: 0000000000310001 RDI: ffff88081ae8a818
Jan 11 02:41:03 spirit-14 kernel: RBP: ffff881013303c90 R08: ffff88101dad95c0 R09: 0000000000000000
Jan 11 02:41:03 spirit-14 kernel: R10: 00000e0a543fd4e1 R11: 0000000000000400 R12: ffff88101dad95c0
Jan 11 02:41:03 spirit-14 kernel: R13: ffff88101d8195c0 R14: 00000e0a543fd4e1 R15: 0000000000000400
Jan 11 02:41:03 spirit-14 kernel: FS:  0000000000000000(0000) GS:ffff88101dac0000(0000) knlGS:0000000000000000
Jan 11 02:41:03 spirit-14 kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Jan 11 02:41:03 spirit-14 kernel: CR2: 0000000000616dc0 CR3: 00000000019fa000 CR4: 00000000001607e0
Jan 11 02:41:03 spirit-14 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Jan 11 02:41:03 spirit-14 kernel: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Jan 11 02:41:03 spirit-14 kernel: Call Trace:
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff816a070f&amp;gt;] queued_spin_lock_slowpath+0xb/0xf
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff816ade20&amp;gt;] _raw_spin_lock+0x20/0x30
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bc1dd5&amp;gt;] ldlm_prepare_lru_list+0x325/0x4e0 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bbfca0&amp;gt;] ? ldlm_iter_helper+0x20/0x20 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bc6f01&amp;gt;] ldlm_cancel_lru+0x61/0x170 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bd2191&amp;gt;] ldlm_cli_pool_recalc+0x231/0x240 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bd27e9&amp;gt;] ldlm_pool_recalc+0x109/0x1d0 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bd40a4&amp;gt;] ldlm_pools_recalc+0x224/0x3d0 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bd42e5&amp;gt;] ldlm_pools_thread_main+0x95/0x330 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff810c6440&amp;gt;] ? wake_up_state+0x20/0x20
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffffc0bd4250&amp;gt;] ? ldlm_pools_recalc+0x3d0/0x3d0 [ptlrpc]
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff810b252f&amp;gt;] kthread+0xcf/0xe0
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff810b2460&amp;gt;] ? insert_kthread_work+0x40/0x40
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff816b8798&amp;gt;] ret_from_fork+0x58/0x90
Jan 11 02:41:03 spirit-14 kernel: [&amp;lt;ffffffff810b2460&amp;gt;] ? insert_kthread_work+0x40/0x40
Jan 11 02:41:03 spirit-14 kernel: Code: 0d 48 98 83 e2 30 48 81 c2 c0 95 01 00 48 03 14 c5 20 8e b1 81 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 &amp;lt;85&amp;gt; c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In &lt;tt&gt;ldlm_prepare_lru_list()&lt;/tt&gt; we used to skip locks with &lt;tt&gt;lock-&amp;gt;l_last_used == cfs_time_current()&lt;/tt&gt;. But this was removed in &lt;a href=&quot;https://review.whamcloud.com/#/c/28370/10/lustre/ldlm/ldlm_request.c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/28370/10/lustre/ldlm/ldlm_request.c&lt;/a&gt; when we switched from tracking &lt;tt&gt;l_last_used&lt;/tt&gt; in seconds to nanoseconds. Now we check &lt;tt&gt;lock-&amp;gt;l_last_used&lt;/tt&gt; later in the policy function but only after releasing &lt;tt&gt;ns-&amp;gt;ns_lock&lt;/tt&gt;. I was wondering if removing the &lt;tt&gt;l_last_used&lt;/tt&gt; check from the inner loop together with concurrent calls to &lt;tt&gt;ldlm_prepare_lru_list()&lt;/tt&gt; from other threads was adding enough contention on &lt;tt&gt;ns-&amp;gt;ns_lock&lt;/tt&gt; to cause the soft lockups.&lt;/p&gt;</description>
                <environment></environment>
        <key id="50326">LU-10537</key>
            <summary>softlockup in ldlm_prepare_lru_list()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jhammond">John Hammond</reporter>
                        <labels>
                    </labels>
                <created>Fri, 19 Jan 2018 22:11:02 +0000</created>
                <updated>Wed, 5 Aug 2020 21:19:19 +0000</updated>
                            <resolved>Wed, 5 Aug 2020 21:19:19 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="276765" author="adilger" created="Wed, 5 Aug 2020 21:19:14 +0000"  >&lt;p&gt;probably a dupe of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11075&quot; title=&quot;suspicious code in ldlm_prepare_lru_list()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11075&quot;&gt;&lt;del&gt;LU-11075&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="44868">LU-9230</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="52525">LU-11075</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzrev:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>