<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:30:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9920] Use pagevec for marking pages dirty</title>
                <link>https://jira.whamcloud.com/browse/LU-9920</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When doing i/o from multiple writers to a single file, the&lt;br/&gt;
per-file page cache lock (mapping-&amp;gt;tree_lock) becomes a&lt;br/&gt;
bottleneck.&lt;/p&gt;

&lt;p&gt;Most current uses are single page at a time.  This converts&lt;br/&gt;
one prominent use, marking page as dirty, to use a pagevec.&lt;/p&gt;

&lt;p&gt;This improves shared file write performance notably when&lt;br/&gt;
many threads are writing to one file.&lt;/p&gt;

&lt;p&gt;[NB: Detailed numbers coming.  Looks like no change in&lt;br/&gt;
uncontended case, rising to ~25% for 8 writers.]&lt;/p&gt;</description>
                <environment></environment>
        <key id="47978">LU-9920</key>
            <summary>Use pagevec for marking pages dirty</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="dongyang">Dongyang Li</assignee>
                                    <reporter username="paf">Patrick Farrell</reporter>
                        <labels>
                    </labels>
                <created>Fri, 25 Aug 2017 16:27:49 +0000</created>
                <updated>Thu, 22 Oct 2020 06:19:01 +0000</updated>
                            <resolved>Tue, 1 Oct 2019 03:22:10 +0000</resolved>
                                                    <fixVersion>Lustre 2.13.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="206432" author="gerrit" created="Fri, 25 Aug 2017 16:28:36 +0000"  >&lt;p&gt;Patrick Farrell (paf@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/28711&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28711&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9920&quot; title=&quot;Use pagevec for marking pages dirty&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9920&quot;&gt;&lt;del&gt;LU-9920&lt;/del&gt;&lt;/a&gt; vvp: dirty pages with pagevec&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f30d9cb22e6461f5d5b9325448740518bc46a78c&lt;/p&gt;</comment>
                            <comment id="209102" author="adilger" created="Thu, 21 Sep 2017 17:32:29 +0000"  >&lt;p&gt;Patrick, a similar issue exists when pages are dropped from cache upon lock cancellation.  It would be useful to clean this up to use &lt;tt&gt;invalidate_page_range()&lt;/tt&gt; or similar to drop pages from cache (at least in stripe_size chunks) instead of doing it one page at a time as it does today.&lt;/p&gt;</comment>
                            <comment id="209116" author="paf" created="Thu, 21 Sep 2017 18:14:21 +0000"  >&lt;p&gt;Andreas,&lt;/p&gt;

&lt;p&gt;See &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;br/&gt;
 And:&lt;br/&gt;
 &lt;a href=&quot;https://review.whamcloud.com/28667&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28667&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Unlike this patch (which is new and has not run in production), Cray has been using a version of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; for a while.&lt;/p&gt;</comment>
                            <comment id="209119" author="paf" created="Thu, 21 Sep 2017 18:32:31 +0000"  >&lt;p&gt;Ah, sorry, you&apos;ve already seen that.  Still, I think it&apos;s the answer to your thought.&lt;/p&gt;</comment>
                            <comment id="209133" author="adilger" created="Thu, 21 Sep 2017 20:51:42 +0000"  >&lt;p&gt;You are right.  I &lt;em&gt;thought&lt;/em&gt; I&apos;d seen something similar, but when searching Jira I couldn&apos;t find &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9906&quot; title=&quot;Allow Lustre page dropping to use pagevec_release&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9906&quot;&gt;&lt;del&gt;LU-9906&lt;/del&gt;&lt;/a&gt; with the keywords I was using.  I&apos;ve now linked these tickets together.&lt;/p&gt;</comment>
                            <comment id="244669" author="pfarrell" created="Tue, 26 Mar 2019 15:59:48 +0000"  >&lt;p&gt;As discussed in the gerrit patch for this, the improvement isn&apos;t worth the amount of kernel code we have to copy (in slightly modified form).&lt;/p&gt;</comment>
                            <comment id="249123" author="gerrit" created="Wed, 12 Jun 2019 14:02:25 +0000"  >&lt;p&gt;Patrick Farrell (pfarrell@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/35206&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35206&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9920&quot; title=&quot;Use pagevec for marking pages dirty&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9920&quot;&gt;&lt;del&gt;LU-9920&lt;/del&gt;&lt;/a&gt; vvp: dirty pages with pagevec&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2016ba18081a2c7a0efc3829b7e1f06d6054b44c&lt;/p&gt;</comment>
                            <comment id="249317" author="adilger" created="Sat, 15 Jun 2019 18:58:24 +0000"  >&lt;p&gt;Patrick, is it possible that the high lock contention is a result of something that Lustre is doing under the lock (e.g. per-page lock callbacks or something) or is it conclusive that all of the contention is in the pagecache itself?  There is an ongoing discussion in &lt;tt&gt;linux-fsdevel&lt;/tt&gt; about page cache scalability w.r.t. concurrent writers, so if there is something we could usefully contribute to the upstream kernel that would help us down the road, now would be a good time to do so.&lt;/p&gt;</comment>
                            <comment id="249321" author="pfarrell" created="Sat, 15 Jun 2019 22:24:07 +0000"  >&lt;p&gt;My word, that&apos;s quite a thread.&#160; I should&apos;ve gotten involved a few days ago.&#160; It&apos;s also degraded a bit, since Linus and Dave are ... getting along well ... in their most recent notes.&lt;/p&gt;

&lt;p&gt;I&apos;m going to gather my thoughts/explain a bit here, I know some of this should go on the list, but I&apos;ve got a lot of catching up to do before I can comment there.&lt;/p&gt;

&lt;p&gt;So our workload is lots of threads doing buffered i/o to one file*, and we&apos;re spending lots of time on spinlocks in two functions:&lt;br/&gt;
*I don&apos;t know where Kent gets the idea that multiple writers must mean buffered and direct, but that&apos;s a sampling from the middle and I have about another hundred messages to read...&lt;br/&gt;
(as seen in Ihara&apos;s flame graph &lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/32772/lustre-ssf.svg&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.whamcloud.com/secure/attachment/32772/lustre-ssf.svg&lt;/a&gt;)&lt;br/&gt;
__add_to_page_cache_locked&lt;br/&gt;
and:&lt;br/&gt;
__set_page_dirty_nobuffers&lt;/p&gt;

&lt;p&gt;The particular spinlock is the one that protects the page cache itself.&#160; Used to mapping-&amp;gt;tree_lock, but it&apos;s not called a tree any more, so the lock has a new name...&#160; Same lock, though.&lt;/p&gt;

&lt;p&gt;These functions are both modifying the radix tree/xarray (the data structure used for the page cache was renamed and re-APIed&#160;in the last year &lt;span class=&quot;error&quot;&gt;&amp;#91;no functional changes&amp;#93;&lt;/span&gt;) that holds the pages for this particular file.&#160; The first is adding pages, and the second one is tagging radix tree entries as dirty.&#160; (Pages have dirty state, but the radix tree/xarray entries are also tagged as dirty, which is a separate thing, and which allows writeback (mostly writeback) to rapidly find dirty pages.&#160; It is this tagging that needs to write the tree.)&lt;/p&gt;

&lt;p&gt;In upstream, both of these operations are done a single page at a time, going back several calls.&lt;/p&gt;

&lt;p&gt;The dirty tagging side is quite simple to do with a pagevec of pages instead, and not release the lock for each page.&#160; This is still tagging one page at a time, but at least it holds the lock.&#160; That&apos;s what my patch does - Hold the lock around the loop where the pages are added.&#160; (The latest version takes advantage of Lustre specific behavior to move several operations outside of the lock, an upstream version would look more like the earlier one...&#160; But as Ihara proved, that didn&apos;t make much difference anyway.)&lt;/p&gt;

&lt;p&gt;The xarray/radix tree has the ability to tag pages in batches, but, bizarrely, it&apos;s only exposed with a function that does &quot;if have TAG X, add TAG Y&quot;, which is used for marking dirty pages for writeback.&#160; There is no way to just batch TAG X (and you can&apos;t trick the function either).&#160; It&apos;s a slightly bizarre choice and would be very easy to fix with a modification of that function.&lt;/p&gt;

&lt;p&gt;Briefly put, the adding side is notably more complex to do in batches, mostly because there are a bunch of operations that can potentially fail, and also because the write functions deal with one page at a time, so there&apos;s no obvious place to pull in the pages.&lt;/p&gt;

&lt;p&gt;The core write function in the kernel (generic_perform_write) is a big loop that&apos;s basically:&lt;br/&gt;
&quot;ask file system for page at offset X&quot;&lt;br/&gt;
(this calls .write_begin from the address space ops...)&lt;br/&gt;
&quot;copy data to that page&quot;&lt;br/&gt;
&quot;tell file system we&apos;re done&quot;&lt;br/&gt;
(this calls .write_end)&lt;br/&gt;
All one page at a time.&lt;/p&gt;

&lt;p&gt;This is the thing Kent open-coded. He doesn&apos;t use write_begin/write_end (though he&apos;s basically got versions of them).&lt;br/&gt;
He does each step in a separate loop over a bunch of pages each time, but he still gets his pages in the page cache one at a time - So if he allowed multiple writers to a file (I&apos;m guessing he doesn&apos;t based on his comments), he&apos;d crash in to the same wall we&apos;re hitting. He might get better behavior because he calls the page getting stuff in a tight loop (I have a patch for that I want to play with, but it&apos;s complicated.)&lt;/p&gt;

&lt;p&gt;Like I said, I&apos;ll have to pore over that thread carefully, but the straightforward things to do would be:&lt;br/&gt;
Allow dirtying with a pagevec&lt;/p&gt;

&lt;p&gt;Allow bulk tagging in the xarray interface&lt;/p&gt;

&lt;p&gt;The basic problem with the rest is code complexity, and then getting anything else to benefit.&#160; Without multiple concurrent writers to a single file (so, range locking rather than the inode mutex), there&apos;s not really contention on managing the per-file page tracking...&#160; Any benefit experienced by anyone without that would be very small.&lt;/p&gt;</comment>
                            <comment id="255654" author="gerrit" created="Mon, 30 Sep 2019 23:12:15 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/28711/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28711/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9920&quot; title=&quot;Use pagevec for marking pages dirty&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9920&quot;&gt;&lt;del&gt;LU-9920&lt;/del&gt;&lt;/a&gt; vvp: dirty pages with pagevec&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: a7299cb012f8c5574a0cc07ff0e32218fb49d733&lt;/p&gt;</comment>
                            <comment id="255696" author="pjones" created="Tue, 1 Oct 2019 03:22:10 +0000"  >&lt;p&gt;Landed for 2.13&lt;/p&gt;</comment>
                            <comment id="282955" author="gerrit" created="Thu, 22 Oct 2020 06:19:01 +0000"  >&lt;p&gt;Jian Yu (yujian@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/40347&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40347&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9920&quot; title=&quot;Use pagevec for marking pages dirty&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9920&quot;&gt;&lt;del&gt;LU-9920&lt;/del&gt;&lt;/a&gt; vvp: dirty pages with pagevec&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b5e6d9840b8515de5102a15a11bc66f95bf25a27&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="53139">LU-11290</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="55934">LU-12429</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="47927">LU-9906</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="54292">LU-11775</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzj33:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>