<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:11:29 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14639] confusion of lru_size=0 if lru-resize disabled</title>
                <link>https://jira.whamcloud.com/browse/LU-14639</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;There is an configure option to enable/disable lru-resize (--enable-lru-resize/&lt;br class=&quot;atl-forced-newline&quot; /&gt;--disable-lru-resize), but that confuses when lru_size set 0.&lt;br/&gt;
 In both cases, it&apos;s able to set 0 to lru_size, but they make different behaviors.&lt;/p&gt;

&lt;p&gt;And currently, if lru-resize disabled(--disable-lru-resize) when lustre builds, it won&apos;t be able to re-enable lru-resize even lru_size set 0.&lt;/p&gt;

&lt;p&gt;./configure --enable-lru-resize&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sky06 ~]# mount -t lustre 10.0.11.110@o2ib10:/ai7990 /ai7990
mount -t lustre 10.0.11.110@o2ib10:/ai7990 /ai7990
mount.lustre: according to /etc/mtab 10.0.11.110@o2ib10:/ai7990 is already mounted on /ai7990
[root@sky06 ~]# lctl get_param ldlm.*.*.lru_size
ldlm.namespaces.MGC10.0.11.110@o2ib10.lru_size=8800
ldlm.namespaces.ai7990-MDT0000-mdc-ffff8f5408893800.lru_size=1
ldlm.namespaces.ai7990-MDT0001-mdc-ffff8f5408893800.lru_size=0
ldlm.namespaces.ai7990-OST0000-osc-ffff8f5408893800.lru_size=0
ldlm.namespaces.ai7990-OST0001-osc-ffff8f5408893800.lru_size=0
[root@sky06 ~]# lctl set_param ldlm.*.*.lru_size=0
ldlm.namespaces.MGC10.0.11.110@o2ib10.lru_size=0
ldlm.namespaces.ai7990-MDT0000-mdc-ffff8f5408893800.lru_size=0
ldlm.namespaces.ai7990-MDT0001-mdc-ffff8f5408893800.lru_size=0
ldlm.namespaces.ai7990-OST0000-osc-ffff8f5408893800.lru_size=0
ldlm.namespaces.ai7990-OST0001-osc-ffff8f5408893800.lru_size=0
[root@sky06 ~]# time find /ai7990/testdir &amp;gt; /dev/null 2&amp;gt;&amp;amp;1

real	0m15.183s
user	0m0.367s
sys	0m6.587s
[root@sky06 ~]# lctl get_param ldlm.*.*.lru_size
ldlm.namespaces.MGC10.0.11.110@o2ib10.lru_size=0
ldlm.namespaces.ai7990-MDT0000-mdc-ffff8f5408893800.lru_size=30502
ldlm.namespaces.ai7990-MDT0001-mdc-ffff8f5408893800.lru_size=0
ldlm.namespaces.ai7990-OST0000-osc-ffff8f5408893800.lru_size=2499
ldlm.namespaces.ai7990-OST0001-osc-ffff8f5408893800.lru_size=2501
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;./configure --disable-lru-resize&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sky06 ~]# mount -t lustre 10.0.11.110@o2ib10:/ai7990 /ai7990
[root@sky06 ~]# lctl get_param ldlm.*.*.lru_size
ldlm.namespaces.MGC10.0.11.110@o2ib10.lru_size=8800
ldlm.namespaces.ai7990-MDT0000-mdc-ffff8ef7ae5ef000.lru_size=8800
ldlm.namespaces.ai7990-MDT0001-mdc-ffff8ef7ae5ef000.lru_size=8800
ldlm.namespaces.ai7990-OST0000-osc-ffff8ef7ae5ef000.lru_size=8800
ldlm.namespaces.ai7990-OST0001-osc-ffff8ef7ae5ef000.lru_size=8800
[root@sky06 ~]# lctl set_param ldlm.*.*.lru_size=0
ldlm.namespaces.MGC10.0.11.110@o2ib10.lru_size=0
ldlm.namespaces.ai7990-MDT0000-mdc-ffff8ef7ae5ef000.lru_size=0
ldlm.namespaces.ai7990-MDT0001-mdc-ffff8ef7ae5ef000.lru_size=0
ldlm.namespaces.ai7990-OST0000-osc-ffff8ef7ae5ef000.lru_size=0
ldlm.namespaces.ai7990-OST0001-osc-ffff8ef7ae5ef000.lru_size=0
[root@sky06 ~]# time find /ai7990/testdir &amp;gt; /dev/null 2&amp;gt;&amp;amp;1
 
real	0m26.491s
user	0m0.358s
sys	0m11.809s
[root@sky06 ~]# lctl get_param ldlm.*.*.lru_size
ldlm.namespaces.MGC10.0.11.110@o2ib10.lru_size=0
ldlm.namespaces.ai7990-MDT0000-mdc-ffff8ef7ae5ef000.lru_size=0
ldlm.namespaces.ai7990-MDT0001-mdc-ffff8ef7ae5ef000.lru_size=0
ldlm.namespaces.ai7990-OST0000-osc-ffff8ef7ae5ef000.lru_size=0
ldlm.namespaces.ai7990-OST0001-osc-ffff8ef7ae5ef000.lru_size=0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
</description>
                <environment>master(commit:gdfe87b0)</environment>
        <key id="63925">LU-14639</key>
            <summary>confusion of lru_size=0 if lru-resize disabled</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="sihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Sat, 24 Apr 2021 00:19:55 +0000</created>
                <updated>Tue, 30 May 2023 17:59:54 +0000</updated>
                                            <version>Lustre 2.15.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="367730" author="adilger" created="Wed, 29 Mar 2023 15:04:07 +0000"  >&lt;p&gt;What is expected to be the right behavior here?  &lt;tt&gt;&amp;#45;&amp;#45;disable&amp;#45;lru&amp;#45;resize&lt;/tt&gt; removed the LRU resize code completely, so only fixed-size LRU is possible. Then setting &lt;tt&gt;lru_size=0&lt;/tt&gt; results in no locks being cached on the clients. &lt;/p&gt;

&lt;p&gt;Why even build with &lt;tt&gt;&amp;#45;&amp;#45;disable&amp;#45;lru&amp;#45;resize&lt;/tt&gt; these days, instead of just setting &quot;&lt;tt&gt;lctl set_param &amp;#45;P ldlm.namespaces.&amp;lt;fsname&amp;gt;&amp;#42;.lru_size=500&lt;/tt&gt;&quot; or similar? &lt;/p&gt;

&lt;p&gt;The only option I see is to change &lt;tt&gt;&amp;#45;&amp;#45;disable&amp;#45;lru&amp;#45;resize&lt;/tt&gt; to &lt;b&gt;not&lt;/b&gt; actually disable the LRU resize code, and instead have it just set a fixed LRU size by default to prevent users from shooting themselves in the foot because they are using old instructions when building clients.  I don&apos;t &lt;em&gt;think&lt;/em&gt; this is documented anywhere, but if it is then it should be removed. &lt;/p&gt;</comment>
                            <comment id="367802" author="sihara" created="Wed, 29 Mar 2023 21:50:03 +0000"  >&lt;blockquote&gt;&lt;p&gt;What is expected to be the right behavior here? &lt;tt&gt;--disable-lru-resize&lt;/tt&gt; removed the LRU resize code completely, so only fixed-size LRU is possible. Then setting &lt;tt&gt;lru_size=0&lt;/tt&gt; results in no locks being cached on the clients.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;The problem is that &lt;tt&gt;lru_size=0&lt;/tt&gt; is configurable regardless client was built with --disable-lru-resize or --enable-lru-resize, but people can&apos;t make judge which is which after changed &lt;tt&gt;lru_size=0&lt;/tt&gt;. Even non zero value setting, it&apos;s hard to confirm. When client umounts and mounts lustre again, it can confirm by default value (zero or non-zero lru_size).&lt;br/&gt;
There are two totally different behaviors, but controlling in same parameter and value.&lt;/p&gt;

&lt;p&gt;If client was built with &lt;tt&gt;--disable-lru-resize&lt;/tt&gt;, setting &lt;tt&gt;lru_size=0&lt;/tt&gt; shouldn&apos;t be acceptable?, but other value needs to be defined to give &quot;no locks cache&quot; meaning? e.g. &lt;tt&gt;lru_size=false&lt;/tt&gt;&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;Why even build with &lt;tt&gt;--disable-lru-resize&lt;/tt&gt; these days, instead of just setting &quot;&lt;tt&gt;lctl set_param -P ldlm.namespaces.&amp;lt;fsname&amp;gt;*.lru_size=500&lt;/tt&gt;&quot; or similar?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;This is fine, but it still needs per client setting. client&apos;s UUID changes all time doesn&apos;t it? e.g. it would limit lru_size only for login or data mover nodes, etc.&lt;/p&gt;</comment>
                            <comment id="367813" author="adilger" created="Wed, 29 Mar 2023 22:41:48 +0000"  >&lt;p&gt;There was a discussion about having per-client tunables linked to nodemap in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11077&quot; title=&quot;Client-specific tunable parameter configuration&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11077&quot;&gt;LU-11077&lt;/a&gt;,  or possibly a client-local &lt;tt&gt;/etc/lustre/&amp;lt;fsname&amp;gt;-client.params&lt;/tt&gt; file to set parameters at mount time.   However, that has not been implemented.&lt;/p&gt;

&lt;p&gt;Shuichi, is the main goal of using &lt;tt&gt;&amp;#45;&amp;#45;disable&amp;#45;lru&amp;#45;resize&lt;/tt&gt; to have a different/static LRU size on a small number of clients (e.g. login node or data mover), or is  used for all clients in a cluster?  Or is there some other issue with LRU resize that means it should be disabled entirely from the code (e.g. jitter on compute nodes, or other reasons to disable the code completely?  Is this option widely used for all client builds, or only in specific cases?&lt;/p&gt;

&lt;p&gt;I&apos;m wondering if the meaning of the &lt;tt&gt;&amp;#45;&amp;#45;disable&amp;#45;lru&amp;#45;resize&lt;/tt&gt; option should be changed from &lt;b&gt;removing&lt;/b&gt; the LRU resize code to just changing it to have a constant &lt;tt&gt;lru_size&lt;/tt&gt; value?  Is there really a time when &quot;&lt;tt&gt;lru_size=0&lt;/tt&gt;&quot; should mean &quot;cache zero locks&quot; (which would be terrible for performance, as you see here)?  If a client should minimize lock cache size, I can&apos;t imagine that &lt;tt&gt;lru_size=5&lt;/tt&gt; or similar would cause many issues, and would at least still allow a few files to re-use locks on the client...&lt;/p&gt;</comment>
                            <comment id="368614" author="sihara" created="Thu, 6 Apr 2023 05:13:39 +0000"  >&lt;p&gt;In many cases, reason of &apos;--disable-lru-resize&apos; would have a limit of lock counts in cache per client on entire cluster. &lt;br/&gt;
And people cleanup caches after job finishes. (e.g. integrated running &apos;lctl set_param ldlm.namespaces.*.lru_size=clear&apos; command in job scheduler as a post script)&lt;/p&gt;
&lt;blockquote&gt;&lt;p&gt;I&apos;m wondering if the meaning of the --disable-lru-resize option should be changed from removing the LRU resize code to just changing it to have a constant lru_size value?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Indeed, still enabling LRU, but fixed value by default make sense.&lt;br/&gt;
It&apos;s still possible to change 0 or even more lower/higher value if it needs. And we can also control LRU speed by lru_max_age parameter.&lt;/p&gt;</comment>
                            <comment id="373853" author="paf0186" created="Tue, 30 May 2023 17:22:35 +0000"  >&lt;p&gt;It would be nice to drop the configure option entirely - I don&apos;t think changing defaults as a build time option is very good practice, I think it should be done with at runtime with settings unless there&apos;s a reason that doesn&apos;t work.&#160; It&apos;s weird to be able to change something with both build configuration &lt;b&gt;and&lt;/b&gt; runtime options.&#160; I think it would be nice to get away from the idea of changing behavior with &lt;em&gt;build&lt;/em&gt; time flags if that behavior can also be adjusted at runtime.&lt;/p&gt;

&lt;p&gt;I&apos;m guessing though that since we have customers who are using this build option, it would be easier to just change it to set a default value, right?&lt;/p&gt;</comment>
                            <comment id="373864" author="paf0186" created="Tue, 30 May 2023 17:40:43 +0000"  >&lt;p&gt;Actually, I was just thinking about this, and:&lt;/p&gt;

&lt;p&gt;I don&apos;t think changing the build flag to set a default is a very good idea.&#160; A good default value seems very hard to pick.&#160; Like, what is a good default value for lru_size?&#160; What represents a good compromise between memory usage and performance?&#160; Etc.&#160; The &apos;correct&apos; value depends on whether it is an MDC or an OSC connection, possibly changing if DOM is in use, and on client and server memory size, etc.&#160; We have lru-resize specifically because the correct value is hard to choose - it is very system specific - so instead we choose the value dynamically.&lt;/p&gt;

&lt;p&gt;So I think instead we should encourage people who want to set a specific lru_size to &apos;do the right thing&apos; by removing the build option.&#160; They will then set the lru_size value they want in their configuration.&#160; This is what the customers using the build option are doing anyway - they built with disable-lru-resize out of caution, but they are all manually setting specific lru_size values.&#160; So the build option is never used by itself anyway.&lt;/p&gt;</comment>
                            <comment id="373866" author="gerrit" created="Tue, 30 May 2023 17:55:59 +0000"  >&lt;p&gt;&quot;Patrick Farrell &amp;lt;pfarrell@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51165&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51165&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14639&quot; title=&quot;confusion of lru_size=0 if lru-resize disabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14639&quot;&gt;LU-14639&lt;/a&gt; build: Remove disable-lru-resize config&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 3f457d586f8b01fd1f69e4d761f216535bacb4ca&lt;/p&gt;</comment>
                            <comment id="373867" author="gerrit" created="Tue, 30 May 2023 17:56:00 +0000"  >&lt;p&gt;&quot;Patrick Farrell &amp;lt;pfarrell@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51166&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51166&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14639&quot; title=&quot;confusion of lru_size=0 if lru-resize disabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14639&quot;&gt;LU-14639&lt;/a&gt; tests: remove disable-lru-resize check&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9e8e99f4f06bdb8bf5610ee7060bd6925c5e8c0a&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="52532">LU-11077</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01t13:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>