<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:30:21 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9906] Allow Lustre page dropping to use pagevec_release</title>
                <link>https://jira.whamcloud.com/browse/LU-9906</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When Lustre releases a lot of cached pages at once, it still calls page_release, instead of pagevec_release.  When clearing OST ldlm lock lrus, the ldlm_bl threads end up spending much of their time contending for the zone lock taken by page_release.&lt;/p&gt;

&lt;p&gt;With many namespaces and parallel lru clearing (as Cray does at the end of each job), this can be a significant time sink.  Using pagevec release is much better.  Patch coming shortly.&lt;/p&gt;</description>
                <environment></environment>
        <key id="47927">LU-9906</key>
            <summary>Allow Lustre page dropping to use pagevec_release</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="paf">Patrick Farrell</assignee>
                                    <reporter username="paf">Patrick Farrell</reporter>
                        <labels>
                            <label>performance</label>
                    </labels>
                <created>Wed, 23 Aug 2017 19:52:59 +0000</created>
                <updated>Thu, 19 Dec 2019 00:55:07 +0000</updated>
                            <resolved>Wed, 21 Nov 2018 05:35:42 +0000</resolved>
                                                    <fixVersion>Lustre 2.12.0</fixVersion>
                    <fixVersion>Lustre 2.10.7</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="206203" author="gerrit" created="Wed, 23 Aug 2017 19:58:35 +0000"  >&lt;p&gt;Patrick Farrell (paf@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/28667&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28667&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; clio: use pagevec_release for many pages&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: da6033fa0d989c5a8ff5a0bf9d1a8d1f4350a0b1&lt;/p&gt;</comment>
                            <comment id="209121" author="paf" created="Thu, 21 Sep 2017 18:33:24 +0000"  >&lt;p&gt;Quoting Andreas in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9920&quot; title=&quot;Use pagevec for marking pages dirty&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9920&quot;&gt;&lt;del&gt;LU-9920&lt;/del&gt;&lt;/a&gt;:&lt;br/&gt;
&quot;Patrick, a similar issue exists when pages are dropped from cache upon lock cancellation. It would be useful to clean this up to use invalidate_page_range() or similar to drop pages from cache (at least in stripe_size chunks) instead of doing it one page at a time as it does today.&quot;&lt;/p&gt;

&lt;p&gt;Invalidate_page_range is something else, but I think this does what you&apos;re talking about.  I don&apos;t think we can drop pages in such large chunks, pagevec_release is the best I&apos;m aware of without writing our own.  (And I wonder about holding the relevant lock long enough to drop stripe_size chunks.)&lt;/p&gt;</comment>
                            <comment id="216248" author="gerrit" created="Thu, 14 Dec 2017 03:30:36 +0000"  >&lt;p&gt;Patrick Farrell (paf@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/30531&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30531&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; osd: use pagevec for putting pages&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f48a740f3bad58ffda44d268454775a4fd26d5a6&lt;/p&gt;</comment>
                            <comment id="220933" author="gerrit" created="Wed, 14 Feb 2018 00:52:30 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/30531/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30531/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; osd: use pagevec for putting pages&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 2a2adfd04245a24148d8de29b8558cd98c92bffa&lt;/p&gt;</comment>
                            <comment id="237028" author="sihara" created="Thu, 15 Nov 2018 13:52:59 +0000"  >&lt;p&gt;patch &lt;a href=&quot;https://review.whamcloud.com/#/c/28667&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/28667&lt;/a&gt; gives huge contributions for single client performance improvements.&lt;br/&gt;
 In fact, today, there is a single performance b/w limit if network b/w is higher than IB EDR bandwidth. (e.g. 2 x IB EDR with MR on client)&lt;br/&gt;
 This is not LNET/MR problem, but we confirmed this is because of overhead on lru reclaim in CLIO.&lt;br/&gt;
 Using pagevec for lru reclaim in addition original patch 28667 shows 32% write and ~60% read performance gains.&lt;/p&gt;

&lt;p&gt;Here is test results. &lt;br/&gt;
 I&apos;ve tested with both 1MB buffered IO and 16MB O_DIRECT to make sure no LNET/MR issue and saturate network bandwith without IO pass of buffered IO.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;1 x client (2 x Intel Platinum 8160 CPU @ 2.10GHz, 192GB Memory)

parameter
lctl set_param osc.*.max_pages_per_rpc=16M osc.*.max_rpcs_in_flight=16 osc.*.max_dirty_mb=512 osc.*.checksums=0 llite.*.max_read_ahead_mb=2048

IOR command
mpirun -np 48 ior -w -r -t 16m -b 16g -F -e -vv -o /scratch0/file -i 1 -B (O_DIRECT)
mpirun -np 48 ior -w -r -t 16m -b 16g -F -e -vv -o /scratch0/file -i 1 (buffered)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;&#160;&lt;/th&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;mode&lt;/th&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;write(GB/s)&lt;/th&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;read(GB/s)&lt;/th&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;O_DIRECT&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;20.8&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;21.8&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master+patch28667&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;O_DIRECT&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;20.7&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;22.2&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;Buffered&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;11.6&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;12.3&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;master+patch28667&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;Buffered&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;15.3&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;19.6&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;
</comment>
                            <comment id="237037" author="paf" created="Thu, 15 Nov 2018 16:53:50 +0000"  >&lt;p&gt;That&apos;s &lt;b&gt;really&lt;/b&gt; impressive.&lt;/p&gt;

&lt;p&gt;What kernel version are you running there?&#160; I&apos;m curious specifically if you have queued spinlocks.&#160; I haven&apos;t looked at lru_reclaim specifically, but the other areas affected by this patch got much better with new kernel versions.&#160; (ie the patch is less important if you have queued spinlocks)&lt;/p&gt;</comment>
                            <comment id="237043" author="sihara" created="Thu, 15 Nov 2018 17:48:05 +0000"  >&lt;p&gt;i&apos;m testing on 3.10.0-693.21.1.el7.x86_64.&lt;br/&gt;
please see two attached flamegraph for ior read.&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/31475/master-read.svg&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.whamcloud.com/secure/attachment/31475/master-read.svg&lt;/a&gt; (without patch)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/31474/master-patch28667-read.svg&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.whamcloud.com/secure/attachment/31474/master-patch28667-read.svg&lt;/a&gt; (with patch 28667)&lt;/p&gt;

&lt;p&gt;cost reduction at discard_pagevec() is from 57.59% to 17.48% after patch.&lt;/p&gt;</comment>
                            <comment id="237046" author="paf" created="Thu, 15 Nov 2018 18:07:01 +0000"  >&lt;p&gt;Huh!&#160; Thank you for the detailed look.&#160; I am surprised it&apos;s so large with the queued spinlocks, but I&apos;m glad it&apos;s helping so much.&#160; Nice find.&lt;/p&gt;</comment>
                            <comment id="237069" author="adilger" created="Thu, 15 Nov 2018 22:42:02 +0000"  >&lt;p&gt;This is great.  It shows that the performance is nearly identical for buffered and unbuffered large reads. &lt;/p&gt;

&lt;p&gt;It would seem like the next big user is &lt;tt&gt;osc_lru_alloc()&lt;/tt&gt;, but it may be that looks like it is taking a lot of time because there is an enforced wait when there are not enough pages.  Given that we are very close to peak performance for the reads, it probably makes more sense to focus on improving the write side.&lt;/p&gt;</comment>
                            <comment id="237300" author="gerrit" created="Wed, 21 Nov 2018 04:05:27 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/28667/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28667/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; clio: use pagevec_release for many pages&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: b4a959eb61bc7e6a64261c704f3f3f5e220c2f02&lt;/p&gt;</comment>
                            <comment id="237312" author="pjones" created="Wed, 21 Nov 2018 05:35:42 +0000"  >&lt;p&gt;Landed for 2.12&lt;/p&gt;</comment>
                            <comment id="239557" author="gerrit" created="Tue, 8 Jan 2019 18:34:11 +0000"  >&lt;p&gt;Minh Diep (mdiep@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/33988&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33988&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; osd: use pagevec for putting pages&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e380923f87494519f8a9281ace0c53054f8aab5c&lt;/p&gt;</comment>
                            <comment id="242033" author="gerrit" created="Fri, 15 Feb 2019 01:28:52 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/33988/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33988/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; osd: use pagevec for putting pages&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 76f01221aaf3c4a65a4f1b9af1363838921843a1&lt;/p&gt;</comment>
                            <comment id="242078" author="pfarrell" created="Fri, 15 Feb 2019 15:51:13 +0000"  >&lt;p&gt;Landing just the OSD side patch to b2_10 is good here - It was required for some kernel compatibility changes (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10565&quot; title=&quot;Handle some small features in upstream&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10565&quot;&gt;&lt;del&gt;LU-10565&lt;/del&gt;&lt;/a&gt;), and is trivial.&lt;/p&gt;

&lt;p&gt;There is no need to land the other patch from this ticket - &lt;a href=&quot;https://review.whamcloud.com/28667/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28667/&lt;/a&gt;&#160;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt;&#160;clio: use pagevec_release for many pages.&#160; The two patches here are independent, and the clio one is non-trivial.&#160; Not a good candidate for a maintenance branch.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="47978">LU-9920</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="31474" name="master-patch28667-read.svg" size="280171" author="sihara" created="Thu, 15 Nov 2018 17:45:45 +0000"/>
                            <attachment id="31475" name="master-read.svg" size="214526" author="sihara" created="Thu, 15 Nov 2018 17:45:42 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzziw7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>