<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:13:44 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7997] RCU stalls waiting for lu_sites_guard mutex in lu_cache_shrink_count</title>
                <link>https://jira.whamcloud.com/browse/LU-7997</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The lu_cache_shrink_count algorithm introduced by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6365&quot; title=&quot;Eliminate unnecessary loop in lu_cache_shrink to improve performance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6365&quot;&gt;&lt;del&gt;LU-6365&lt;/del&gt;&lt;/a&gt; does not scale well as the number of processors increases. In low memory conditions, many processes calling into lu_cache_shrink concurrently trigger RCU stalls. Most of the processes are waiting on the lu_sites_guard mutex. The process holding the mutex is executing in ls_stats_read.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 201}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 175}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 116}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 253}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 194}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 21}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 207}  (t=15000 jiffies g=111081 c=111080 q=22545)
c0-0c1s14n0 INFO: rcu_sched self-detected stall on CPU { 230}  (t=60004 jiffies g=111081 c=111080 q=22552)
c0-0c1s14n0 INFO: rcu_sched detected stalls on CPUs/tasks: { 230} (detected by 265, t=60005 jiffies, g=111081, c=111080, q=22552)


c0-0c1s14n0 CPU: 182 PID: 47501 Comm: mem_seg_registe Tainted: P           O  3.12.51-52.31.1_1.0000.9069-cray_ari_c #1
c0-0c1s14n0 RIP: 0010:[&amp;lt;ffffffffa04f5a51&amp;gt;]  [&amp;lt;ffffffffa04f5a51&amp;gt;] lprocfs_stats_collect+0xb1/0x180 [obdclass]
c0-0c1s14n0 Call Trace:
c0-0c1s14n0 [&amp;lt;ffffffffa05188d9&amp;gt;] ls_stats_read+0x19/0x50 [obdclass]
c0-0c1s14n0 [&amp;lt;ffffffffa051a66c&amp;gt;] lu_cache_shrink_count+0x5c/0x120 [obdclass]
c0-0c1s14n0 [&amp;lt;ffffffff81132c45&amp;gt;] shrink_slab_node+0x45/0x290
c0-0c1s14n0 [&amp;lt;ffffffff8113393b&amp;gt;] shrink_slab+0x8b/0x160
c0-0c1s14n0 [&amp;lt;ffffffff81136d9f&amp;gt;] do_try_to_free_pages+0x33f/0x4a0
c0-0c1s14n0 [&amp;lt;ffffffff81136fbf&amp;gt;] try_to_free_pages+0xbf/0x150
c0-0c1s14n0 [&amp;lt;ffffffff8112b205&amp;gt;] __alloc_pages_nodemask+0x6a5/0xb00
c0-0c1s14n0 [&amp;lt;ffffffff8116ab80&amp;gt;] alloc_pages_vma+0xa0/0x180
c0-0c1s14n0 [&amp;lt;ffffffff8114c6ea&amp;gt;] handle_mm_fault+0x8ba/0xb60
c0-0c1s14n0 [&amp;lt;ffffffff8114caf6&amp;gt;] __get_user_pages+0x166/0x5b0
c0-0c1s14n0 [&amp;lt;ffffffff8114cf92&amp;gt;] get_user_pages+0x52/0x60
c0-0c1s14n0 [&amp;lt;ffffffff8103f182&amp;gt;] get_user_pages_fast+0xb2/0x1b0
c0-0c1s14n0 [&amp;lt;ffffffffa019b23d&amp;gt;] kgni_mem_set_pages+0xfd/0x1710 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa019c8a5&amp;gt;] kgni_mem_register_pin_pages+0x55/0x2f0 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa019d850&amp;gt;] kgni_mem_seg_register_pin+0xd10/0x1520 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa01a02ee&amp;gt;] kgni_mem_register+0x158e/0x3160 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa01d1ab2&amp;gt;] kgni_ioctl+0xd02/0x1520 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffff8119476d&amp;gt;] do_vfs_ioctl+0x2dd/0x4b0
c0-0c1s14n0 [&amp;lt;ffffffff81194985&amp;gt;] SyS_ioctl+0x45/0x80
c0-0c1s14n0 [&amp;lt;ffffffff8149faf2&amp;gt;] system_call_fastpath+0x16/0x1b
c0-0c1s14n0 [&amp;lt;000000002013d7a7&amp;gt;] 0x2013d7a6


c0-0c1s14n0 NMI backtrace for cpu 116
c0-0c1s14n0 CPU: 116 PID: 47508 Comm: mem_seg_registe Tainted: P           O  3.12.51-52.31.1_1.0000.9069-cray_ari_c #1
c0-0c1s14n0 RIP: 0010:[&amp;lt;ffffffff810895fa&amp;gt;]  [&amp;lt;ffffffff810895fa&amp;gt;] osq_lock+0x5a/0xb0
c0-0c1s14n0 Call Trace:
c0-0c1s14n0 [&amp;lt;ffffffff8149614a&amp;gt;] __mutex_lock_slowpath+0x5a/0x1a0
c0-0c1s14n0 [&amp;lt;ffffffff814962a7&amp;gt;] mutex_lock+0x17/0x27
c0-0c1s14n0 [&amp;lt;ffffffffa051a636&amp;gt;] lu_cache_shrink_count+0x26/0x120 [obdclass]
c0-0c1s14n0 [&amp;lt;ffffffff81132c45&amp;gt;] shrink_slab_node+0x45/0x290
c0-0c1s14n0 [&amp;lt;ffffffff8113393b&amp;gt;] shrink_slab+0x8b/0x160
c0-0c1s14n0 [&amp;lt;ffffffff81136d9f&amp;gt;] do_try_to_free_pages+0x33f/0x4a0
c0-0c1s14n0 [&amp;lt;ffffffff81136fbf&amp;gt;] try_to_free_pages+0xbf/0x150
c0-0c1s14n0 [&amp;lt;ffffffff8112b205&amp;gt;] __alloc_pages_nodemask+0x6a5/0xb00
c0-0c1s14n0 [&amp;lt;ffffffff8116ab80&amp;gt;] alloc_pages_vma+0xa0/0x180
c0-0c1s14n0 [&amp;lt;ffffffff8114c6ea&amp;gt;] handle_mm_fault+0x8ba/0xb60
c0-0c1s14n0 [&amp;lt;ffffffff8114caf6&amp;gt;] __get_user_pages+0x166/0x5b0
c0-0c1s14n0 [&amp;lt;ffffffff8114cf92&amp;gt;] get_user_pages+0x52/0x60
c0-0c1s14n0 [&amp;lt;ffffffff8103f182&amp;gt;] get_user_pages_fast+0xb2/0x1b0
c0-0c1s14n0 [&amp;lt;ffffffffa019b23d&amp;gt;] kgni_mem_set_pages+0xfd/0x1710 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa019c8a5&amp;gt;] kgni_mem_register_pin_pages+0x55/0x2f0 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa019d850&amp;gt;] kgni_mem_seg_register_pin+0xd10/0x1520 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa01a02ee&amp;gt;] kgni_mem_register+0x158e/0x3160 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffffa01d1ab2&amp;gt;] kgni_ioctl+0xd02/0x1520 [kgni_ari]
c0-0c1s14n0 [&amp;lt;ffffffff8119476d&amp;gt;] do_vfs_ioctl+0x2dd/0x4b0
c0-0c1s14n0 [&amp;lt;ffffffff81194985&amp;gt;] SyS_ioctl+0x45/0x80
c0-0c1s14n0 [&amp;lt;ffffffff8149faf2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;As the number of cpus grows, the summing of the LU_SS_LRU_LEN counters is not significantly faster than summing counters across hash buckets, as was done prior to the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6365&quot; title=&quot;Eliminate unnecessary loop in lu_cache_shrink to improve performance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6365&quot;&gt;&lt;del&gt;LU-6365&lt;/del&gt;&lt;/a&gt; patch. Processes needing memory bottleneck waiting to get the lu_sites_guard mutex.&lt;/p&gt;

&lt;p&gt;The proposed solution is a two pronged attack: &lt;/p&gt;

&lt;p&gt;1. Reduce the time spent getting the object count by replacing the &lt;br/&gt;
 the LU_SS_LRU_LEN counter in lu_sites.stats with a kernel percpu_counter. This shifts the overhead of summing across the cpus from lu_cache_shrink_count to the functions that increment/decrement the counter. The summing is only done when an individual cpu count exceeds a threshold so the overhead along the increment/decrement paths is minimized. lu_cache_shrink_count  may return a stale value but this is acceptable for the purposes of a shrinker. (Using the kernel&apos;s percpu_counter was also proposed as an improvement to the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6365&quot; title=&quot;Eliminate unnecessary loop in lu_cache_shrink to improve performance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6365&quot;&gt;&lt;del&gt;LU-6365&lt;/del&gt;&lt;/a&gt; patch.)&lt;/p&gt;

&lt;p&gt;2. Increase concurrent access to the lu_sites list by changing the lu_sites_guard lock from a mutex to a read/write semaphore.&lt;br/&gt;
lu_cache_shrink_count simply reads data so it does not need to wait for other readers. lu_cache_shrink_scan, which actually frees the unused objects, is still serialized.&lt;/p&gt;</description>
                <environment>Lustre 2.7.0 clients on SLES12</environment>
        <key id="35976">LU-7997</key>
            <summary>RCU stalls waiting for lu_sites_guard mutex in lu_cache_shrink_count</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="amk">Ann Koehler</assignee>
                                    <reporter username="amk">Ann Koehler</reporter>
                        <labels>
                    </labels>
                <created>Thu, 7 Apr 2016 21:45:05 +0000</created>
                <updated>Tue, 15 Aug 2017 12:06:51 +0000</updated>
                            <resolved>Wed, 15 Jun 2016 13:16:06 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="148186" author="gerrit" created="Thu, 7 Apr 2016 22:08:03 +0000"  >&lt;p&gt;Ann Koehler (amk@cray.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19390&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19390&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7997&quot; title=&quot;RCU stalls waiting for lu_sites_guard mutex in lu_cache_shrink_count&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7997&quot;&gt;&lt;del&gt;LU-7997&lt;/del&gt;&lt;/a&gt; obd: RCU stalls in lu_cache_shrink_count()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 74d033dc90d22bf26d4c10cc6f5d19933669a0c2&lt;/p&gt;</comment>
                            <comment id="148240" author="amk" created="Fri, 8 Apr 2016 16:11:49 +0000"  >&lt;p&gt;CentOS 7 has added a GFP flag parameter to the percpu_counter_init function. I don&apos;t have ready access to a CentOS 7 system. Could someone from Intel make the necessary code changes?&lt;/p&gt;</comment>
                            <comment id="148323" author="adilger" created="Sun, 10 Apr 2016 05:52:36 +0000"  >&lt;p&gt;Ann, I commented in the patch about how to fix the &lt;tt&gt;percpu_counter_init()&lt;/tt&gt; function for RHEL7.  There is also a patch from Alex &lt;a href=&quot;http://review.whamcloud.com/19082&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19082&lt;/a&gt; that also optimizes this same code path to reduce overhead in lu_cache.&lt;/p&gt;

&lt;p&gt;I&apos;d be willing to land your current patch (with updates for RHEL7) to resolve the immediate problems you are seeing.  That said, I think it also makes sense to look at the &lt;tt&gt;lprocfs_counter_&amp;#42;&lt;/tt&gt; interfaces and update them to use &lt;tt&gt;percpu_&amp;#42;&lt;/tt&gt; interfaces (internally at least) that are available in the kernel, instead of completely rolling our own.  That is also work that is useful to have done to clean up the upstream kernel client.&lt;/p&gt;

&lt;p&gt;There is even work in the upstream kernel to add &lt;tt&gt;percpu_stats_&amp;#42;&lt;/tt&gt; interfaces that would make this transition even simpler (&lt;a href=&quot;https://lkml.org/lkml/2016/4/8/397&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://lkml.org/lkml/2016/4/8/397&lt;/a&gt;), but they aren&apos;t yet landed to mainline.  One option would be to copy these new interfaces for older vendor kernels used by Lustre until a configure check shows them to be available.&lt;/p&gt;</comment>
                            <comment id="148406" author="amk" created="Mon, 11 Apr 2016 15:43:09 +0000"  >&lt;p&gt;Andreas, Thank you for all your comments. &lt;/p&gt;

&lt;p&gt;I made the change needed for RHEL7 and just submitted a new patch version. I&apos;m relying on Maloo for testing.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/19082&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19082&lt;/a&gt; applies to the free object phase of the shrinker. All our problems have been in counting the number of freeable objects. Whenever I&apos;ve dug this count out of dumps or traced the value, it has been 0. I don&apos;t think the lu_cache shrinker frees anything very often or at least I haven&apos;t been able to figure out the use case where it does. Nevertheless the stalls we see occur when there is nothing to free.  So improvements are needed in count rather than purge.&lt;/p&gt;

&lt;p&gt;I agree with you that replacing the lprocfs_counter_* interfaces is a good idea. How about a new bug for that work? With my current patch, I was focused only on improving the changes I did for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6365&quot; title=&quot;Eliminate unnecessary loop in lu_cache_shrink to improve performance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6365&quot;&gt;&lt;del&gt;LU-6365&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="155588" author="gerrit" created="Tue, 14 Jun 2016 03:46:34 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/19390/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19390/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7997&quot; title=&quot;RCU stalls waiting for lu_sites_guard mutex in lu_cache_shrink_count&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7997&quot;&gt;&lt;del&gt;LU-7997&lt;/del&gt;&lt;/a&gt; obd: RCU stalls in lu_cache_shrink_count()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 73c54401dad20b9cb927b68d0f1068a2bd8f9319&lt;/p&gt;</comment>
                            <comment id="155775" author="jgmitter" created="Wed, 15 Jun 2016 13:16:06 +0000"  >&lt;p&gt;patch has landed to master for 2.9.0&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="35507">LU-7896</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzy79b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>