<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:03:06 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6770] use per_cpu request pool osc_rq_pools</title>
                <link>https://jira.whamcloud.com/browse/LU-6770</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt; With many OSCs, the osc will pre-alloc memory at start.&lt;br/&gt;
  That will occupy the memory of application, especially when the&lt;br/&gt;
  client need to interact with hundreds of OSTs.&lt;/p&gt;

&lt;p&gt;    We can solve it by using a golbal per_cpu pool &apos;osc_rq_pools&apos; rather than&lt;br/&gt;
    local pool for per osc to change this situation. The upper limit&lt;br/&gt;
    size of requests in pools is about 1 percent of the total memory.&lt;/p&gt;

&lt;p&gt;    Also, administrator can use a module parameter to limit the momory&lt;br/&gt;
    usage by:&lt;br/&gt;
        options osc osc_reqpool_mem_max=num&lt;br/&gt;
    The unit of num is MB, and the upper limit will be:&lt;br/&gt;
        MIN(num, 1% total memory)&lt;/p&gt;</description>
                <environment></environment>
        <key id="30841">LU-6770</key>
            <summary>use per_cpu request pool osc_rq_pools</summary>
                <type id="4" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11310&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ys">Yang Sheng</assignee>
                                    <reporter username="wangshilong">Wang Shilong</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Sat, 27 Jun 2015 06:45:20 +0000</created>
                <updated>Fri, 4 Aug 2017 19:41:27 +0000</updated>
                            <resolved>Thu, 13 Aug 2015 02:46:26 +0000</resolved>
                                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="119760" author="gerrit" created="Sat, 27 Jun 2015 06:49:10 +0000"  >&lt;p&gt;Wang Shilong (wshilong@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/15422&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15422&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6770&quot; title=&quot;use per_cpu request pool osc_rq_pools&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6770&quot;&gt;&lt;del&gt;LU-6770&lt;/del&gt;&lt;/a&gt; osc: use per_cpu request pool osc_rq_pools&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 8fec66ecc9871d9b0c52f7f5ce65bda3a130cc38&lt;/p&gt;</comment>
                            <comment id="119763" author="pjones" created="Sat, 27 Jun 2015 20:25:10 +0000"  >&lt;p&gt;Yang Sheng&lt;/p&gt;

&lt;p&gt;Could you please take care of this patch?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="119905" author="ys" created="Tue, 30 Jun 2015 02:33:58 +0000"  >&lt;p&gt;Hello Wang,&lt;/p&gt;

&lt;p&gt;I have reviewed the patch you provided and have started to run Lustre, with this patch, on my local system to verify that it works well.&lt;/p&gt;

&lt;p&gt;Once I have completed my testing I will work with you to get the patch reviewed by others, so that it can be landed.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Yang Sheng.&lt;/p&gt;</comment>
                            <comment id="120532" author="ys" created="Tue, 7 Jul 2015 07:40:56 +0000"  >&lt;p&gt;The patch has passed test &amp;amp; review process. But Oleg has some comment as below:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[CST&#19979;&#21320;1&#26102;53&#20998;29&#31186;] yang sheng: could you please give me some point about http://review.whamcloud.com/#/c/15422/
[CST&#19979;&#21320;1&#26102;54&#20998;01&#31186;] yang sheng: can it be landed or still need waiting a while?
[CST&#19979;&#21320;1&#26102;54&#20998;07&#31186;] Oleg Drokin: why do we need percpu pool there?
[CST&#19979;&#21320;1&#26102;54&#20998;28&#31186;] Oleg Drokin: I mean it&apos;s still an improvement, but what if I have 260 CPUs?
[CST&#19979;&#21320;1&#26102;54&#20998;40&#31186;] Oleg Drokin: I would imagine havign a static pool of a fixed size is probably best of all
[CST&#19979;&#21320;1&#26102;57&#20998;45&#31186;] Oleg Drokin: I think the pool does not need to be super big. Just a fixed number of reqests, something like 50 (or 100, need to see how big they are) should be enough. we only expect to use them during severe OOM anyway
[CST&#19979;&#21320;1&#26102;58&#20998;00&#31186;] Oleg Drokin: with perhaps a module parameter if somebody wants an override
[CST&#19979;&#21320;1&#26102;59&#20998;05&#31186;] yang sheng: Yes, it is reasonable.
[CST&#19979;&#21320;1&#26102;59&#20998;44&#31186;] yang sheng: as this patch given the limit is %1 of total memory.
[CST&#19979;&#21320;2&#26102;00&#20998;16&#31186;] yang sheng: seem big than you are point.
[CST&#19979;&#21320;2&#26102;01&#20998;19&#31186;] Oleg Drokin: Yes. I feel it&apos;s really excessive. But the initial reasoning was that every OSC could have up to 32M of dirty pages and can send up to 8 (default) RPCs in flight.
[CST&#19979;&#21320;2&#26102;01&#20998;40&#31186;] Oleg Drokin: so every OSC had this pool in order to send the many RPCs even in OOM
[CST&#19979;&#21320;2&#26102;02&#20998;02&#31186;] Oleg Drokin: in reality if you have 2000 OSTs, it&apos;s unlikely you&apos;d have dirty pages in all of them at the same time
[CST&#19979;&#21320;2&#26102;02&#20998;11&#31186;] Oleg Drokin: so we need to be reasonable here
[CST&#19979;&#21320;2&#26102;02&#20998;38&#31186;] Oleg Drokin: 1% of 1T of memory is still a cool 10G
[CST&#19979;&#21320;2&#26102;03&#20998;34&#31186;] yang sheng: so a fixed size is enough to handle such stiuation.
[CST&#19979;&#21320;2&#26102;03&#20998;46&#31186;] Oleg Drokin: finding a proper number is going to be tricky, but I feel it should be on the lower side somewhere in tens or low hundreds for most cases except perhaps the most extreme ones
[CST&#19979;&#21320;2&#26102;04&#20998;10&#31186;] Oleg Drokin: that&apos;s why having an override is important of course, with a good documentation about it like I explained above
[CST&#19979;&#21320;2&#26102;05&#20998;02&#31186;] yang sheng: I see. got it. Thank you very much. Oleg.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="120534" author="wangshilong" created="Tue, 7 Jul 2015 08:48:18 +0000"  >&lt;p&gt;Hi Yang Sheng,&lt;/p&gt;

&lt;p&gt;So maybe we can set @osc_reqpool_mem_max=100MB or so in default, and it will try to allocate memory by checking&lt;br/&gt;
min(100M&#65292; 1% of memory), dose this make sense for you?&lt;/p&gt;

&lt;p&gt;Best Regards,&lt;br/&gt;
Shilong&lt;/p&gt;</comment>
                            <comment id="120670" author="ys" created="Wed, 8 Jul 2015 04:15:03 +0000"  >&lt;p&gt;Hi, Shilong,&lt;/p&gt;

&lt;p&gt;I think maybe we still need consider OST number when we decide the mem_max parameter.  Unless it big than a fixed number or override by module paramter. How do you think about it?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Yang Sheng&lt;/p&gt;</comment>
                            <comment id="121146" author="gerrit" created="Mon, 13 Jul 2015 15:06:54 +0000"  >&lt;p&gt;Li Xi (lixi@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/15585&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15585&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6770&quot; title=&quot;use per_cpu request pool osc_rq_pools&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6770&quot;&gt;&lt;del&gt;LU-6770&lt;/del&gt;&lt;/a&gt; osc: use global osc_rq_pool to reduce memory usage&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c22aa4d8c553e974214a27e516728d88df73663c&lt;/p&gt;</comment>
                            <comment id="123321" author="ihara" created="Wed, 5 Aug 2015 13:56:57 +0000"  >&lt;p&gt;patch &lt;a href=&quot;http://review.whamcloud.com/15585&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15585&lt;/a&gt; Abandoned. new patch is &lt;a href=&quot;http://review.whamcloud.com/#/c/15422/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/15422/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="123328" author="ihara" created="Wed, 5 Aug 2015 14:12:56 +0000"  >&lt;p&gt;Here is quick benchmark results on master with/without &lt;a href=&quot;http://review.whamcloud.com/#/c/15422&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/15422&lt;/a&gt;&lt;br/&gt;
4 x OSS and an client(2 x E5-2660v3, 20 CPU cores, 128GB memory and 1 x FDR Infiniband)&lt;/p&gt;

&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&amp;nbsp;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master(w/o stress)&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master(w/ stress)&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master+15422(w/o stress)&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master+15422(w/ stress)&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;Write(M/B/sec) &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5604&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4838&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5702&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4846&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;Read(M/B/sec) &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4218&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;3703&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4261&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;3939&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;


&lt;p&gt;Here is IOR syntax on this test.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# mpirun -np 10 /work/ihara/IOR -w -e -t 1m -b 26g -k -F -o /scratch1/file
# pdsh -g oss,client &quot;sync; echo 3 &amp;gt; /proc/sys/vm/drop_caches&quot;
# mpirun -np 10 /work/ihara/IOR -r -e -t 1m -b 26g -F -o /scratch1/file
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;For IOR with stress testing, I generated memory pressure with &quot;stress&quot; command and ran IOR  under it.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# stress --vm 10 --vm-bytes 10G
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;No perforamnce regression with patch 15422 so far.&lt;/p&gt;</comment>
                            <comment id="123335" author="ihara" created="Wed, 5 Aug 2015 14:40:50 +0000"  >&lt;p&gt;This is memory usage on client with 200 OSTs configuation.&lt;/p&gt;

&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&amp;nbsp;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;Mem usage&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;Slab allocation&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 444616&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 136412&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master+15422&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 91324 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;64412 &lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Mem usage=MemFree(Before mount) - MemFree(after mount)
Slab allocation=Slab(After mount) - Slab(Before mount)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Patch 15422 helps to reduce significant memory usages.&lt;/p&gt;</comment>
                            <comment id="124003" author="gerrit" created="Wed, 12 Aug 2015 23:44:40 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/15422/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15422/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6770&quot; title=&quot;use per_cpu request pool osc_rq_pools&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6770&quot;&gt;&lt;del&gt;LU-6770&lt;/del&gt;&lt;/a&gt; osc: use global osc_rq_pool to reduce memory usage&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 44c4f47c4d1f185831d4629cc9ca5ae5f50a8e07&lt;/p&gt;</comment>
                            <comment id="124017" author="ys" created="Thu, 13 Aug 2015 02:46:26 +0000"  >&lt;p&gt;Patch landed, Close this ticket.&lt;/p&gt;</comment>
                            <comment id="126659" author="gerrit" created="Tue, 8 Sep 2015 16:02:24 +0000"  >&lt;p&gt;Wrong ticket number.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxgqn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>