<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:24:34 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16162] ldiskfs: use low disk tracks for block allocation on empty or moderately full filesystems.</title>
                <link>https://jira.whamcloud.com/browse/LU-16162</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Disk performance degrades, when new blocks get allocated near the end of the disk,&lt;/p&gt;

&lt;p&gt;For example, the below are obdsurvey-results when mb_last_group is manually set to 0/75%/90% of max block group num before running the test:&lt;/p&gt;
&lt;blockquote&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;3949.61 (0) vs 3677.15 (75%) vs 3133.43 (90%)

[root@cslmo2305 ~]# rpm -qi lustre_ib | grep Version | dshbak -c
----------------
Version
----------------
2.15.0.3_rc2_cray_165_g3355f27
[root@cslmo2305 ~]# echo 0 &amp;gt; /proc/fs/ldiskfs/md*/mb_last_group
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
0
[root@cslmo2305 ~]# nobjlo=2 nobjhi=2 thrlo=1024 thrhi=1024 size=393216 rszlo=4096 rszhi=4096 tests_str=&quot;write read&quot; obdfilter-survey | egrep -v &quot;^done&quot; 2&amp;gt;/dev/null
Wed May 18 13:23:57 UTC 2022 Obdfilter-survey for case=disk from cslmo2305
ost  1 sz 402653184K rsz 4096K obj    2 thr 1024 write 3949.61 [1399.72, 4272.29] read 4451.99 [1679.81, 5986.40] 
/usr/bin/iokit-libecho: line 235: 69223 Killed                  remote_shell $host &quot;vmstat 5 &amp;gt;&amp;gt; $host_vmstatf&quot; &amp;amp;&amp;gt; /dev/null
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
3565
[root@cslmo2305 ~]# echo 0 &amp;gt; /proc/fs/ldiskfs/md*/mb_last_group
[root@cslmo2305 ~]# nobjlo=2 nobjhi=2 thrlo=1024 thrhi=1024 size=393216 rszlo=4096 rszhi=4096 tests_str=&quot;write read&quot; obdfilter-survey | egrep -v &quot;^done&quot; 2&amp;gt;/dev/null
Wed May 18 13:27:24 UTC 2022 Obdfilter-survey for case=disk from cslmo2305
ost  1 sz 402653184K rsz 4096K obj    2 thr 1024 write 3975.33 [1207.70, 4299.46] read 4517.36 [1623.78, 5675.05] 
/usr/bin/iokit-libecho: line 235: 76282 Killed                  remote_shell $host &quot;vmstat 5 &amp;gt;&amp;gt; $host_vmstatf&quot; &amp;amp;&amp;gt; /dev/null
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
3590
[root@cslmo2305 ~]# echo 1040830 &amp;gt; /proc/fs/ldiskfs/md*/mb_last_group
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
1040830
[root@cslmo2305 ~]# nobjlo=2 nobjhi=2 thrlo=1024 thrhi=1024 size=393216 rszlo=4096 rszhi=4096 tests_str=&quot;write read&quot; obdfilter-survey | egrep -v &quot;^done&quot; 2&amp;gt;/dev/null
Wed May 18 13:30:56 UTC 2022 Obdfilter-survey for case=disk from cslmo2305
ost  1 sz 402653184K rsz 4096K obj    2 thr 1024 write 3677.15 [2194.42, 3995.29] read 4819.83 [3596.32, 5391.15] 
/usr/bin/iokit-libecho: line 235: 82505 Killed                  remote_shell $host &quot;vmstat 5 &amp;gt;&amp;gt; $host_vmstatf&quot; &amp;amp;&amp;gt; /dev/null
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
1044021
[root@cslmo2305 ~]# echo 1040830 &amp;gt; /proc/fs/ldiskfs/md*/mb_last_group
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
1040830
[root@cslmo2305 ~]# nobjlo=2 nobjhi=2 thrlo=1024 thrhi=1024 size=393216 rszlo=4096 rszhi=4096 tests_str=&quot;write read&quot; obdfilter-survey | egrep -v &quot;^done&quot; 2&amp;gt;/dev/null
Wed May 18 13:34:34 UTC 2022 Obdfilter-survey for case=disk from cslmo2305
ost  1 sz 402653184K rsz 4096K obj    2 thr 1024 write 3666.51 [3231.40, 4047.68] read 4798.87 [3963.07, 5255.19] 
/usr/bin/iokit-libecho: line 235: 88607 Killed                  remote_shell $host &quot;vmstat 5 &amp;gt;&amp;gt; $host_vmstatf&quot; &amp;amp;&amp;gt; /dev/null
[root@cslmo2305 ~]# cat /proc/fs/ldiskfs/md*/mb_last_group
1044030
[root@cslmo2305 ~]# echo 1248996 &amp;gt; /proc/fs/ldiskfs/md
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/blockquote&gt;</description>
                <environment></environment>
        <key id="72375">LU-16162</key>
            <summary>ldiskfs: use low disk tracks for block allocation on empty or moderately full filesystems.</summary>
                <type id="4" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11310&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="zam">Alexander Zarochentsev</assignee>
                                    <reporter username="zam">Alexander Zarochentsev</reporter>
                        <labels>
                            <label>ldiskfs</label>
                    </labels>
                <created>Thu, 15 Sep 2022 14:11:54 +0000</created>
                <updated>Wed, 10 May 2023 22:57:26 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="346796" author="adilger" created="Thu, 15 Sep 2022 15:32:15 +0000"  >&lt;p&gt;I&apos;ve been thinking about this issue for some time already, and I think it makes sense to use the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14438&quot; title=&quot;backport ldiskfs mballoc patches&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14438&quot;&gt;LU-14438&lt;/a&gt; patches as a starting point for this. That patch provides an array of allocation groups sorted by size of free extent, and the array is used when searching for a new group for allocations. &lt;/p&gt;

&lt;p&gt;To provide the start/end segregation needed to isolate the slower tracks of the disk, a threshold could be set (eg. 80% of groups, or a specific group number), and this could be used to efficiently split the groups into two arrays. The &quot;fast&quot; array, for groups below the threshold, and the &quot;slow&quot; array for groups larger than the threshold. Allocations would prefer groups from the fast array if there are suitable free chunks, and only look for groups in the slow array of there were none in the fast array.  &lt;/p&gt;

&lt;p&gt;That would only be a small change to the mballoc code, as well as an O(1) change to the array insertion code to pick the correct array in which to insert each group. &lt;/p&gt;</comment>
                            <comment id="346799" author="adilger" created="Thu, 15 Sep 2022 15:39:41 +0000"  >&lt;p&gt;If there was some way for clients to specify QOS for files (&quot;&lt;tt&gt;lfs ladvise&lt;/tt&gt;&quot; or with &quot;&lt;tt&gt;lfs migrate&lt;/tt&gt;&quot;?), it would even be possible to have osd-ldiskfs allocate objects into the slow groups directly. &lt;/p&gt;</comment>
                            <comment id="346847" author="pjones" created="Thu, 15 Sep 2022 22:08:32 +0000"  >&lt;p&gt;&quot;Alexander Zarochentsev &amp;lt;alexander.zarochentsev@hpe.com&amp;gt;&quot; uploaded a new patch:  &lt;a href=&quot;https://review.whamcloud.com/48558&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/48558&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16162&quot; title=&quot;ldiskfs: use low disk tracks for block allocation on empty or moderately full filesystems.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16162&quot;&gt;LU-16162&lt;/a&gt; ldiskfs: keep low tracks allocated by mballoc&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 3&lt;br/&gt;
Commit: cfdf70dc5b3aad26ff746f20ee030c389f9a7715&lt;/p&gt;
</comment>
                            <comment id="346853" author="adilger" created="Fri, 16 Sep 2022 00:06:47 +0000"  >&lt;blockquote&gt;
&lt;p&gt;If there was some way for clients to specify QOS for files (&quot;lfs ladvise&quot; or with &quot;lfs migrate&quot;?), it would even be possible to have osd-ldiskfs allocate objects into the slow groups directly.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Shuichi had a very good use case for being able to write into the &quot;slow&quot; part of the filesystem.  When migrating data from an old filesystem to a new filesystem, the old data will be copied into the newly-formatted OSTs, and fill all of the low groups (high bandwidth), leaving the new data to use slower parts of the disk.  It would be useful to have some mechanism (ladvise process setting, environment variable, layout, or &lt;tt&gt;fcntl(F_SET_RW_HINT)&lt;/tt&gt;?) to force object allocation to e.g. the last 30% of groups for cases like this, so that the beginning of the filesystem remains available for new usage.&lt;/p&gt;</comment>
                            <comment id="347097" author="zam" created="Mon, 19 Sep 2022 16:50:23 +0000"  >&lt;p&gt;&amp;gt; if there was some way for clients to specify QOS for files (&quot;lfs ladvise&quot; or with &quot;lfs migrate&quot;?), it would even be possible to have osd-ldiskfs allocate objects into the slow groups directly.&lt;/p&gt;

&lt;p&gt;no, the patch only addresses a simple but annoying case when mb_last_group points to some high block group num but the fs still almost empty (due to repeatable write / delete usage pattern) also  &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15319&quot; title=&quot;Weird mballoc behaviour&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15319&quot;&gt;&lt;del&gt;LU-15319&lt;/del&gt;&lt;/a&gt; is about a weird mballoc optimization causing skipping of already initialized block groups , i.e. ldiskfs fs starts to write to not initialized part, after some iterations, fs users would notice an empty fs slowness.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="62900">LU-14438</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="67459">LU-15319</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i030an:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>