<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:00:12 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13312] Optimized RA for stride read under memory pressure</title>
                <link>https://jira.whamcloud.com/browse/LU-13312</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12518&quot; title=&quot;improve Lustre unaligned IO read performances&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12518&quot;&gt;&lt;del&gt;LU-12518&lt;/del&gt;&lt;/a&gt; introduced new RA and it supports for page unaligned stride IO and significant improved performance (e.g. IO500 IOR_hard_read). However, it still can be optimized. The current patch sometimes doesn&apos;t work well under memory pressure?, but performance is back after dropping page caches before read. Here is a reproducer and results.&lt;/p&gt;

&lt;p&gt;4 x client(1 x Gold 5218, 96GB RAM)&lt;br/&gt;
 segment=400000 (~300GB per node)&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# mpirun -np 64 ior -w -s 400000 -a POSIX -i 1 -C -Q 1 -g -G 27 -k -e -t 47008 -b 47008 -o /fast/dir/file -O stoneWallingStatusFile=/fast/dir/stonewall -O stoneWallingWearOut=1 -D 300

# mpirun -np 64 ior -r -s 400000 -a POSIX -i 1 -C -Q 1 -g -G 27 -k -e -t 47008 -b 47008 -o /fast/dir/file -O stoneWallingStatusFile=/fast/dir/stonewall -O stoneWallingWearOut=1 -D 300
 
Max Read:  5087.32 MiB/sec (5334.44 MB/sec)

One of client&apos;s RA stat
# lctl get_param llite.*.read_ahead_stats
llite.fast-ffff99878133d000.read_ahead_stats=
snapshot_time             1582946538.113259755 secs.nsecs
hits                      72125088 samples [pages]
misses                    1686810 samples [pages]
readpage not consecutive  6400000 samples [pages]
miss inside window        3011 samples [pages]
failed grab_cache_page    2945424 samples [pages]
read but discarded        35565 samples [pages]
zero size window          100245 samples [pages]
failed to reach end       73663094 samples [pages]
failed to fast read       6396933 samples [pages]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;After dropping pagecache on clients before read.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# clush -a &quot;echo 3 &amp;gt; /proc/sys/vm/drop_caches &quot;
# mpirun -np 64 ior -r -s 400000 -a POSIX -i 1 -C -Q 1 -g -G 27 -k -e -t 47008 -b 47008 -o /fast/dir/file -O stoneWallingStatusFile=/fast/dir/stonewall -O stoneWallingWearOut=1 -D 300

Max Read:  16244.62 MiB/sec (17033.72 MB/sec)

Client&apos;s RA stat
# lctl get_param llite.*.read_ahead_stats
llite.fast-ffff99878133d000.read_ahead_stats=
snapshot_time             1582947544.040550353 secs.nsecs
hits                      73799940 samples [pages]
misses                    63 samples [pages]
readpage not consecutive  6400000 samples [pages]
failed grab_cache_page    2654231 samples [pages]
read but discarded        1 samples [pages]
zero size window          500 samples [pages]
failed to reach end       402367 samples [pages]
failed to fast read       35075 samples [pages]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;</description>
                <environment>master</environment>
        <key id="58223">LU-13312</key>
            <summary>Optimized RA for stride read under memory pressure</summary>
                <type id="4" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11310&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="wshilong">Wang Shilong</assignee>
                                    <reporter username="sihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Sat, 29 Feb 2020 04:01:24 +0000</created>
                <updated>Wed, 17 Feb 2021 22:29:50 +0000</updated>
                            <resolved>Wed, 17 Feb 2021 22:29:50 +0000</resolved>
                                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="264286" author="sihara" created="Sat, 29 Feb 2020 04:03:56 +0000"  >&lt;p&gt;attached is debug=reada in an bad performance case.&lt;/p&gt;</comment>
                            <comment id="264287" author="gerrit" created="Sat, 29 Feb 2020 05:20:12 +0000"  >&lt;p&gt;Wang Shilong (wshilong@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/37761&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37761&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13312&quot; title=&quot;Optimized RA for stride read under memory pressure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13312&quot;&gt;&lt;del&gt;LU-13312&lt;/del&gt;&lt;/a&gt; llite: improve RA under memory pressure&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: a8c77b91b3148aa0dbb9e494f2b771dd38910fd9&lt;/p&gt;</comment>
                            <comment id="264288" author="wshilong" created="Sat, 29 Feb 2020 07:24:24 +0000"  >&lt;p&gt;To be clear for the ticket, there might be several problems here:&lt;/p&gt;

&lt;p&gt;1) This behavior is not acually a regression from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12518&quot; title=&quot;improve Lustre unaligned IO read performances&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12518&quot;&gt;&lt;del&gt;LU-12518&lt;/del&gt;&lt;/a&gt;, memory allocation policy will always have this problem with/without &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12518&quot; title=&quot;improve Lustre unaligned IO read performances&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12518&quot;&gt;&lt;del&gt;LU-12518&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;2) there might be two main reasons that make RA stopped currently:&lt;br/&gt;
     2.1 memory pressure which need be reclaimed some memory from FS.&lt;br/&gt;
     2.2 some lock contention with write from other clients, this might be especially problem like &lt;br/&gt;
           IO500 hard mode, as it will generate many PW locks from different kinds of clients for &lt;br/&gt;
           writting, and then start read mode, this might potentially make RA do not work well if&lt;br/&gt;
          PR locks could not be grabbed further as a lock contention detection(here).&lt;/p&gt;

&lt;p&gt;We should isolate problems, at least focus problem 2.1 in this ticket.&lt;/p&gt;

</comment>
                            <comment id="264289" author="wshilong" created="Sat, 29 Feb 2020 09:06:07 +0000"  >&lt;p&gt;After checking debugs logs, there are many bunch of error logs like:&lt;/p&gt;

&lt;p&gt;00020000:00400000:5.0:1582946316.433652:0:18406:0:(lov_io.c:1049:lov_io_read_ahead()) &lt;span class=&quot;error&quot;&gt;&amp;#91;0x200000404:0x8:0x0&amp;#93;&lt;/span&gt; cra_end = 0, stripes = 240, rc = -61&lt;/p&gt;

&lt;p&gt;-61 is ENODATA which returned by osc_io_read_ahead(), it means readahead could not grab locks ahead, this might be related to&lt;br/&gt;
your &quot;lru_max_age=100&quot; &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=sihara&quot; class=&quot;user-hover&quot; rel=&quot;sihara&quot;&gt;sihara&lt;/a&gt;?&lt;/p&gt;

&lt;p&gt;So that explain ldlm.namespaces.*.lru_size=clear before reading testing start, it guarantee there is no PW locks from other clients and PR locks could be grabbed very aggressively which makes our readahead go very well.&lt;/p&gt;

</comment>
                            <comment id="264290" author="wshilong" created="Sat, 29 Feb 2020 09:08:42 +0000"  >&lt;p&gt;I guess why your set lru_max_age=100 is because of after writing, lock cancel could take a bit time if there is too many PW locks cached in memory.&lt;/p&gt;</comment>
                            <comment id="264291" author="gerrit" created="Sat, 29 Feb 2020 09:32:42 +0000"  >&lt;p&gt;Wang Shilong (wshilong@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/37762&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37762&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13312&quot; title=&quot;Optimized RA for stride read under memory pressure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13312&quot;&gt;&lt;del&gt;LU-13312&lt;/del&gt;&lt;/a&gt; ldlm: fix to stop iterating tree early in ldlm_kms_shift_cb()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c3b211134e5021f52db80d00613de8699f805f7c&lt;/p&gt;</comment>
                            <comment id="264292" author="wshilong" created="Sat, 29 Feb 2020 09:34:32 +0000"  >&lt;p&gt;Regarding to lock cancel problem, i think we talked somewhere, but finally not get a chance to push a known issue there, let&apos;s push it this ticket.&lt;/p&gt;</comment>
                            <comment id="264297" author="sihara" created="Sun, 1 Mar 2020 00:34:12 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;-61 is ENODATA which returned by osc_io_read_ahead(), it means readahead could not grab locks ahead, this might be related to
your &quot;lru_max_age=100&quot; Shuichi Ihara?
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Nope, I didn&apos;t chnage lru_max_age when I got this log.&lt;/p&gt;</comment>
                            <comment id="264298" author="sihara" created="Sun, 1 Mar 2020 00:43:23 +0000"  >&lt;p&gt;I&apos;ve also confimred canceling whole locks before read helped a lot always regardless under memory pressure or not.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# mpirun -np 64 ior -w -s 400000 -a POSIX -i 1 -C -Q 1 -g -G 27 -k -e -t 47008 -b 47008 -o /fast/dir/file -O stoneWallingStatusFile=/fast/dir/stonewall -O stoneWallingWearOut=1 -D 300

# clush -w ec[01-04] lctl set_param  ldlm.namespaces.*.lru_size=clear &amp;gt; /dev/null

# mpirun -np 64 ior -w -s 400000 -a POSIX -i 1 -C -Q 1 -g -G 27 -k -e -t 47008 -b 47008 -o /fast/dir/file -O stoneWallingStatusFile=/fast/dir/stonewall -O stoneWallingWearOut=1 -D 300

Max Read:  22606.54 MiB/sec (23704.67 MB/sec)

Without canceling locks before read
Max Read:  4241.10 MiB/sec (4447.12 MB/sec)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="265946" author="gerrit" created="Tue, 24 Mar 2020 05:20:41 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/37762/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37762/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13312&quot; title=&quot;Optimized RA for stride read under memory pressure&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13312&quot;&gt;&lt;del&gt;LU-13312&lt;/del&gt;&lt;/a&gt; ldlm: fix to stop iterating tree early in ldlm_kms_shift_cb()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: b28b3bd9094ee7be8e3c11a531383246a71d5dec&lt;/p&gt;</comment>
                            <comment id="265959" author="wshilong" created="Tue, 24 Mar 2020 08:18:18 +0000"  >&lt;p&gt;This is not acutally memory problem.&lt;/p&gt;</comment>
                            <comment id="266015" author="spitzcor" created="Tue, 24 Mar 2020 16:59:13 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=wshilong&quot; class=&quot;user-hover&quot; rel=&quot;wshilong&quot;&gt;wshilong&lt;/a&gt;, you closed this, but &lt;a href=&quot;https://review.whamcloud.com/#/c/37761/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/37761/&lt;/a&gt; is still pending for this LU.  Do you intend to abandon or re-target that patch?  Or, shall we re-open this ticket?&lt;/p&gt;</comment>
                            <comment id="266041" author="wshilong" created="Wed, 25 Mar 2020 00:20:20 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=spitzcor&quot; class=&quot;user-hover&quot; rel=&quot;spitzcor&quot;&gt;spitzcor&lt;/a&gt; i&apos;ll abandon that patch.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="34368" name="lctl-dk-ra.txt.gz" size="9281193" author="sihara" created="Sat, 29 Feb 2020 04:03:53 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00ukv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>