<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:13:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7982] Client side QoS based on jobid</title>
                <link>https://jira.whamcloud.com/browse/LU-7982</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Lustre has server side QoS mechanism based on NRS TBF policy (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3558&quot; title=&quot;NRS TBF policy for QoS purposes&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3558&quot;&gt;&lt;del&gt;LU-3558&lt;/del&gt;&lt;/a&gt;). NRS TBF policy is able to enforce rate limitations based on both NID rules and JOBID rules. However, when using JOBD-based TBF rules, if multiple jobs run on the same client, the RPC rates of those jobs will be affected by each other. More precisely, the job that has high RPC rate limitation might get slow RPC rate actually. The reason of that is, the job that has slower RPC rate limitations might exaust the max-in-flight-RPC-number limitation, or the max-cache-pages limitation.&lt;/p&gt;

&lt;p&gt;In order to prevent this from happening, a client side mechanism needs to be added to make the RPC sending chechanism at least more fair for all jobs.&lt;/p&gt;</description>
                <environment></environment>
        <key id="35800">LU-7982</key>
            <summary>Client side QoS based on jobid</summary>
                <type id="2" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11311&amp;avatarType=issuetype">New Feature</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="lixi_wc">Li Xi</assignee>
                                    <reporter username="lixi">Li Xi</reporter>
                        <labels>
                            <label>cea</label>
                            <label>patch</label>
                    </labels>
                <created>Mon, 4 Apr 2016 19:30:16 +0000</created>
                <updated>Tue, 18 Sep 2018 03:29:56 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="147760" author="gerrit" created="Mon, 4 Apr 2016 20:05:15 +0000"  >&lt;p&gt;Li Xi (lixi@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19317&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19317&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7982&quot; title=&quot;Client side QoS based on jobid&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7982&quot;&gt;LU-7982&lt;/a&gt; libcfs: memory allocation without CPT for binheap&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 48be3a4504794a7a315612abd7c7f501f8f75747&lt;/p&gt;</comment>
                            <comment id="147762" author="gerrit" created="Mon, 4 Apr 2016 20:09:48 +0000"  >&lt;p&gt;Li Xi (lixi@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19319&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19319&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7982&quot; title=&quot;Client side QoS based on jobid&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7982&quot;&gt;LU-7982&lt;/a&gt; nrs: Add client OSC side Qos support&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 780b81f5c1ca700c3a6f9d62024ba5b614ef62c7&lt;/p&gt;</comment>
                            <comment id="147793" author="pjones" created="Tue, 5 Apr 2016 05:45:22 +0000"  >&lt;p&gt;Emoly&lt;/p&gt;

&lt;p&gt;Could you please review these patches?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="149656" author="gerrit" created="Thu, 21 Apr 2016 12:11:25 +0000"  >&lt;p&gt;Li Xi (lixi@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19700&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19700&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7982&quot; title=&quot;Client side QoS based on jobid&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7982&quot;&gt;LU-7982&lt;/a&gt; osc: qos support for page cache usage&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 3d6fafd9336e6a235653c7469bf329906d40850c&lt;/p&gt;</comment>
                            <comment id="149663" author="lixi" created="Thu, 21 Apr 2016 13:44:18 +0000"  >&lt;p&gt;The patch 19319 is trying to solve the problem of in-flight-RPC limitation.&lt;br/&gt;
And the patch 19700 is trying to solve the problem of page cache limitation.&lt;/p&gt;

&lt;p&gt;Let&apos;s assume two processes with two different job IDs (Job1 and Job2) are&lt;br/&gt;
writing to the same OST. And JobID based TBF rules are running on that OST. One&lt;br/&gt;
of the two job ID has larger RPC rate R1, and the other has much smaller RPC&lt;br/&gt;
rate R2. Both R1 and R2 are much smaller than the RPC rate that the OST can&lt;br/&gt;
provide. The expected behavior of those two processes is: one of the processes&lt;br/&gt;
has RPC rate of R1, while the other one has RPC rate of R2.&lt;/p&gt;

&lt;p&gt;However, the actual result is that both processes have RPC rate of R2 because&lt;br/&gt;
of the in-flight-RPC limitation. Because R1 is much larger than R2, Job1&apos;s RPC&lt;br/&gt;
finishes much quicker than Job2. And eventually, almost all of the in-flight-RPC&lt;br/&gt;
of that OSC is the RPCs of Job2. And whenever Job1 wants to send an RPC, it&lt;br/&gt;
needs to wait for the completion of Job2&apos;s RPC. The patch 19319 is trying to&lt;br/&gt;
solve the problem.&lt;/p&gt;

&lt;p&gt;With that patch, since the in-flight-RPC is balanced between jobs, the behavior of&lt;br/&gt;
some operations like direct I/O become the same as expected. However, the behavior&lt;br/&gt;
of cached write still has problem. The reason is page cache limitation. Because&lt;br/&gt;
Job1&apos;s RPC that flushes the dirty pages finishes much sooner than Job2, and&lt;br/&gt;
also new idle pages are assigned to Job1 and Job2 randomly, eventually, all the&lt;br/&gt;
pages are belongs to Job2. And thus, whenever Job1 wants to cache some pages,&lt;br/&gt;
it needs to wait the completion of Job2&apos;s RPC. &lt;/p&gt;

&lt;p&gt;The patch 19700 is trying to solve this new problem. If both Job1 and Job2 are&lt;br/&gt;
waiting for idle pages, it will assign the new idle page to the job ID that is&lt;br/&gt;
currently using less pages. I haven&apos;t test it, it might work when there are&lt;br/&gt;
mutiple processes for each job. However, if one job only has one process. there&lt;br/&gt;
might still be problem.&lt;/p&gt;

&lt;p&gt;Let&apos;s assume all the pages are occupied. Most of the time, the processes of&lt;br/&gt;
both Job1 and Job2 are sleeping waiting in the binary heap for more cache pages.&lt;br/&gt;
If one page is released from an finished RPC, it will be assigned to Job1. If&lt;br/&gt;
more than one pages are released from an finished RPC, they will be assigned to&lt;br/&gt;
Job1 and then Job2. However, most of the time, a lot of pages are released in&lt;br/&gt;
one RPC. And that is the problem of patch 19700...&lt;/p&gt;
</comment>
                            <comment id="149802" author="gerrit" created="Fri, 22 Apr 2016 07:34:14 +0000"  >&lt;p&gt;Li Xi (lixi@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19729&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19729&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7982&quot; title=&quot;Client side QoS based on jobid&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7982&quot;&gt;LU-7982&lt;/a&gt; osc: qos support for page cache usage&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 795bfcfb5ee099f36e24e49ff24cca19c080bf8d&lt;/p&gt;</comment>
                            <comment id="149804" author="lixi" created="Fri, 22 Apr 2016 07:51:42 +0000"  >&lt;p&gt;The patch 19729 tries to solve the same problem of 19700 in a different way. And it&lt;br/&gt;
has much more complex design, maybe too complex. However, it is able to balance&lt;br/&gt;
page cache usage between job IDs.&lt;/p&gt;

&lt;p&gt;If the page cache usage usage is balance from the first begining, it will remain balanced&lt;br/&gt;
when all of thoese Job IDs has active I/Os:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297097
job_id: &quot;dd.0&quot;, used: 2731, max: 2731, idle time: 0
job_id: &quot;dd3.0&quot;, used: 2731, max: 2731, idle time: 0
job_id: &quot;dd2.0&quot;, used: 2730, max: 2730, idle time: 0
[root@server9-Centos6-vm01 qos]# cat parallel.sh 
#!/bin/bash
THREADS=1
rm /mnt/lustre/* -f
for THREAD in `seq $THREADS`; do
        FILE1=/mnt/lustre/file1_$THREAD
        FILE2=/mnt/lustre/file2_$THREAD
        FILE3=/mnt/lustre/file3_$THREAD
        dd if=/dev/zero of=$FILE1 bs=1048576 count=10000 &amp;amp;
        dd2 if=/dev/zero of=$FILE2 bs=1048576 count=10000 &amp;amp;
        dd3 if=/dev/zero of=$FILE3 bs=1048576 count=10000 &amp;amp;
done
[root@server9-Centos6-vm01 qos]# sh parallel.sh
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297229
job_id: &quot;dd.0&quot;, used: 0, max: 2731, idle time: 4297155
job_id: &quot;dd3.0&quot;, used: 0, max: 2731, idle time: 4297155
job_id: &quot;dd2.0&quot;, used: 0, max: 2730, idle time: 4297155
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And then, if only one job IDs is active, it will reclaim all the page caches to itself:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@server9-Centos6-vm01 qos]# dd if=/dev/zero of=/mnt/lustre/file1 bs=1048576 count=10000
^C241+0 records in
241+0 records out
252706816 bytes (253 MB) copied, 6.35447 s, 39.8 MB/s

[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8185, current time: 4297294
job_id: &quot;dd.0&quot;, used: 0, max: 2746, idle time: 4297292
job_id: &quot;dd3.0&quot;, used: 0, max: 2716, idle time: 4297292
job_id: &quot;dd2.0&quot;, used: 0, max: 2723, idle time: 4297290
[root@server9-Centos6-vm01 qos]# dd if=/dev/zero of=/mnt/lustre/file1 bs=1048576 count=10000&amp;amp;
[1] 2282
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297303
job_id: &quot;dd.0&quot;, used: 2777, max: 2777, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 2700, idle time: 4297302
job_id: &quot;dd2.0&quot;, used: 0, max: 2715, idle time: 4297302
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297304
job_id: &quot;dd.0&quot;, used: 2777, max: 2777, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 2700, idle time: 4297302
job_id: &quot;dd2.0&quot;, used: 0, max: 2715, idle time: 4297302
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297305
job_id: &quot;dd.0&quot;, used: 2825, max: 2825, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 2668, idle time: 4297304
job_id: &quot;dd2.0&quot;, used: 0, max: 2699, idle time: 4297304
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297307
job_id: &quot;dd.0&quot;, used: 2921, max: 2921, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 2604, idle time: 4297306
job_id: &quot;dd2.0&quot;, used: 0, max: 2667, idle time: 4297306
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297309
job_id: &quot;dd.0&quot;, used: 3113, max: 3113, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 2476, idle time: 4297308
job_id: &quot;dd2.0&quot;, used: 0, max: 2603, idle time: 4297308
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297311
job_id: &quot;dd.0&quot;, used: 3497, max: 3497, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 2220, idle time: 4297310
job_id: &quot;dd2.0&quot;, used: 0, max: 2475, idle time: 4297310
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297312
job_id: &quot;dd.0&quot;, used: 4265, max: 4265, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 1708, idle time: 4297312
job_id: &quot;dd2.0&quot;, used: 0, max: 2219, idle time: 4297312
[root@server9-Centos6-vm01 qos]# 
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297315
job_id: &quot;dd.0&quot;, used: 5801, max: 5801, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 684, idle time: 4297314
job_id: &quot;dd2.0&quot;, used: 0, max: 1707, idle time: 4297314
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297316
job_id: &quot;dd.0&quot;, used: 5801, max: 5801, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 684, idle time: 4297314
job_id: &quot;dd2.0&quot;, used: 0, max: 1707, idle time: 4297314
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297317
job_id: &quot;dd.0&quot;, used: 7509, max: 7509, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 0, idle time: 0
job_id: &quot;dd2.0&quot;, used: 0, max: 683, idle time: 4297316
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297319
job_id: &quot;dd.0&quot;, used: 8192, max: 8192, idle time: 0
job_id: &quot;dd3.0&quot;, used: 0, max: 0, idle time: 0
job_id: &quot;dd2.0&quot;, used: 0, max: 0, idle time: 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And then if all Job IDs start I/O again, the page cache will again be balanced slowly again:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@server9-Centos6-vm01 qos]# sh parallel.sh 
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297447
job_id: &quot;dd.0&quot;, used: 8063, max: 8063, idle time: 0
job_id: &quot;dd3.0&quot;, used: 65, max: 65, idle time: 0
job_id: &quot;dd2.0&quot;, used: 64, max: 64, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297454
job_id: &quot;dd.0&quot;, used: 7791, max: 7791, idle time: 0
job_id: &quot;dd3.0&quot;, used: 201, max: 201, idle time: 0
job_id: &quot;dd2.0&quot;, used: 200, max: 200, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297455
job_id: &quot;dd.0&quot;, used: 6400, max: 7728, idle time: 4297455
job_id: &quot;dd3.0&quot;, used: 232, max: 232, idle time: 0
job_id: &quot;dd2.0&quot;, used: 232, max: 232, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297474
job_id: &quot;dd.0&quot;, used: 7161, max: 7161, idle time: 0
job_id: &quot;dd3.0&quot;, used: 516, max: 516, idle time: 0
job_id: &quot;dd2.0&quot;, used: 515, max: 515, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297511
job_id: &quot;dd.0&quot;, used: 0, max: 6446, idle time: 4297503
job_id: &quot;dd3.0&quot;, used: 0, max: 872, idle time: 4297503
job_id: &quot;dd2.0&quot;, used: 0, max: 874, idle time: 4297504
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297566
job_id: &quot;dd.0&quot;, used: 5694, max: 5694, idle time: 0
job_id: &quot;dd3.0&quot;, used: 1249, max: 1249, idle time: 0
job_id: &quot;dd2.0&quot;, used: 1249, max: 1249, idle time: 0
[root@server9-Centos6-vm01 qos]# sh parallel.sh 
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297601
job_id: &quot;dd.0&quot;, used: 5306, max: 5306, idle time: 0
job_id: &quot;dd3.0&quot;, used: 1442, max: 1442, idle time: 0
job_id: &quot;dd2.0&quot;, used: 1444, max: 1444, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297674
job_id: &quot;dd.0&quot;, used: 0, max: 4570, idle time: 4297652
job_id: &quot;dd3.0&quot;, used: 0, max: 1809, idle time: 4297653
job_id: &quot;dd2.0&quot;, used: 0, max: 1813, idle time: 4297653
[root@server9-Centos6-vm01 qos]# sh parallel.sh 
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297771
job_id: &quot;dd.0&quot;, used: 0, max: 3751, idle time: 4297753
job_id: &quot;dd3.0&quot;, used: 0, max: 2221, idle time: 4297753
job_id: &quot;dd2.0&quot;, used: 0, max: 2220, idle time: 4297753
[root@server9-Centos6-vm01 qos]# sh parallel.sh 
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297798
job_id: &quot;dd.0&quot;, used: 3719, max: 3719, idle time: 0
job_id: &quot;dd3.0&quot;, used: 2237, max: 2237, idle time: 0
job_id: &quot;dd2.0&quot;, used: 2236, max: 2236, idle time: 0
[root@server9-Centos6-vm01 qos]# sh parallel.sh 
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297870
job_id: &quot;dd.0&quot;, used: 2886, max: 2886, idle time: 0
job_id: &quot;dd3.0&quot;, used: 2653, max: 2653, idle time: 0
job_id: &quot;dd2.0&quot;, used: 2653, max: 2653, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297885
job_id: &quot;dd.0&quot;, used: 2731, max: 2731, idle time: 0
job_id: &quot;dd3.0&quot;, used: 2731, max: 2731, idle time: 0
job_id: &quot;dd2.0&quot;, used: 2730, max: 2730, idle time: 0
[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff880109d20c00/osc_cache_class 
total: 8192, assigned: 8192, current time: 4297893
job_id: &quot;dd.0&quot;, used: 2730, max: 2730, idle time: 0
job_id: &quot;dd3.0&quot;, used: 2731, max: 2731, idle time: 0
job_id: &quot;dd2.0&quot;, used: 2731, max: 2731, idle time: 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;As you can see, the balance process is very slow, because the busy Job ID will space one of its pages when one RPC finishes.&lt;br/&gt;
This could be optimized in the future to speed up the balance process though.&lt;/p&gt;</comment>
                            <comment id="149835" author="lixi" created="Fri, 22 Apr 2016 15:10:51 +0000"  >&lt;p&gt;With the updated version (patch set 2) of 19729, all busy job IDs will balance their page usages much much more quickly than&lt;br/&gt;
before. And that makes me more confident with this design.&lt;/p&gt;</comment>
                            <comment id="150452" author="gerrit" created="Thu, 28 Apr 2016 04:23:43 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/19317/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19317/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7982&quot; title=&quot;Client side QoS based on jobid&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7982&quot;&gt;LU-7982&lt;/a&gt; libcfs: memory allocation without CPT for binheap&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: cbe5b45a1d157c7345bd1352c257bee22ad8d085&lt;/p&gt;</comment>
                            <comment id="150659" author="gerrit" created="Sun, 1 May 2016 14:52:59 +0000"  >&lt;p&gt;Li Xi (lixi@ddn.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19896&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19896&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7982&quot; title=&quot;Client side QoS based on jobid&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7982&quot;&gt;LU-7982&lt;/a&gt; osc: qos support for in flight RPC slot usage&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f67a81043213250ee818e7e6bfb920f8eaaba004&lt;/p&gt;</comment>
                            <comment id="150822" author="lixi" created="Tue, 3 May 2016 09:01:06 +0000"  >&lt;p&gt;We&apos;ve got encouraging results that the QoS patches finally work well. In order&lt;br/&gt;
to check the function, we need to use NRS TBF policy on OSS.&lt;/p&gt;

&lt;p&gt;And following is the results without the QoS patches:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;1. Run dd along (NRS policy: fifo)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 13.479 s, 79.7 MB/s

2. Run dd/mydd at the same time on the same client (NRS policy: fifo)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 24.6039 s, 43.6 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 25.3535 s, 42.4 MB/s

3. Run dd/mydd/thdd at the same time on the same client (NRS policy: fifo)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 31.3823 s, 34.2 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 32.4403 s, 33.1 MB/s
[root@QYJ Desktop]# thdd if=/dev/zero of=/mnt/lustre/t3 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 34.4943 s, 31.1 MB/s

4. Change the NRS policy to TBF jobid
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_policies=&quot;tbf jobid&quot;
ost.OSS.ost_io.nrs_policies=tbf jobid
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_tbf_rule=&quot;start A {dd.0} 20&quot; 
ost.OSS.ost_io.nrs_tbf_rule=start A {dd.0} 20
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_tbf_rule=&quot;start B {mydd.0} 10&quot;
ost.OSS.ost_io.nrs_tbf_rule=start B {mydd.0} 10
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_tbf_rule=&quot;start C {thdd.0} 5&quot;
ost.OSS.ost_io.nrs_tbf_rule=start C {thdd.0} 5

5. Run dd/mydd/thdd along (NRS policy: TBF jobid)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 59.2141 s, 18.1 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 103.855 s, 10.3 MB/s
[root@QYJ Desktop]# thdd if=/dev/zero of=/mnt/lustre/t3 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 199.384 s, 5.4 MB/s

6. Run dd/mydd at the same time on the same client (NRS policy: TBF jobid)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 118.265 s, 9.1 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 120.273 s, 8.9 MB/s

7. Run dd/mydd/thdd at the same time on the same client (NRS policy: TBF jobid)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 198.492 s, 5.4 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 204.857 s, 5.2 MB/s
[root@QYJ Desktop]# thdd if=/dev/zero of=/mnt/lustre/t3 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 210.522 s, 5.1 MB/s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;As we can see from the results, the job IDs with higher RPC rates are affected&lt;br/&gt;
by the job ID with lower RPC rate.&lt;/p&gt;

&lt;p&gt;And following is the results with the QoS patches (19729 + 19896):&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;1. Run dd along (NRS policy: fifo)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 13.572 s, 79.1 MB/s

2. Run dd/mydd at the same time on the same client (NRS policy: fifo)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 24.0809 s, 44.6 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 24.0959 s, 44.6 MB/s

3. Change the NRS policy to TBF jobid
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_policies=&quot;tbf jobid&quot;
ost.OSS.ost_io.nrs_policies=tbf jobid
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_tbf_rule=&quot;start A {dd.0} 20&quot; 
ost.OSS.ost_io.nrs_tbf_rule=start A {dd.0} 20
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_tbf_rule=&quot;start B {mydd.0} 10&quot;
ost.OSS.ost_io.nrs_tbf_rule=start B {mydd.0} 10
[root@QYJ tests]# lctl set_param ost.OSS.ost_io.nrs_tbf_rule=&quot;start C {thdd.0} 5&quot;
ost.OSS.ost_io.nrs_tbf_rule=start C {thdd.0} 5

4. Run dd/mydd/thdd along (NRS policy: TBF jobid)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 58.6623 s, 18.3 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 103.291 s, 10.4 MB/s
[root@QYJ Desktop]# thdd if=/dev/zero of=/mnt/lustre/t3 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 198.988 s, 5.4 MB/s

5. Run dd/mydd at the same time on the same client (NRS policy: TBF jobid)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 64.5303 s, 16.6 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 114.617 s, 9.4 MB/s

6. Run dd/mydd/thdd at the same time on the same client (NRS policy: TBF jobid)
[root@QYJ home]# dd if=/dev/zero of=/mnt/lustre/t1 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 68.6446 s, 15.6 MB/s
[root@QYJ Desktop]# mydd if=/dev/zero of=/mnt/lustre/t2 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 123.731 s, 8.7 MB/s
[root@QYJ Desktop]# thdd if=/dev/zero of=/mnt/lustre/t3 bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 219.711 s, 4.9 MB/s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;As we can see from the results, the job IDs with different RPC rates got the&lt;br/&gt;
expected RPC rates. And they did&apos;t affect each other. &lt;/p&gt;

&lt;p&gt;And also, when we run dd/dd2/dd3 on the same client, we can monitor the page&lt;br/&gt;
usage as well as the in flight RPCs used by each Job ID. And following is the&lt;br/&gt;
result. As we can see, the page cache and in flight RPCs are balanced between&lt;br/&gt;
job IDs.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@server9-Centos6-vm01 qos]# cat /proc/fs/lustre/osc/lustre-OST0000-osc-ffff88010b996400/osc_cache_class 
total: 8192, assigned: 8192, current time: 7514054940465(ns), reclaim time: 0, reclaim interval: 1000000000, in flight write RPC: 10, in flight read RPC: 0
job_id: &quot;dd.0&quot;, used: 2731, max: 2731, reclaim time: 0(ns), in flight write RPC: 3, in flight read RPC: 0
job_id: &quot;dd2.0&quot;, used: 2731, max: 2731, reclaim time: 0(ns), in flight write RPC: 3, in flight read RPC: 0
job_id: &quot;dd3.0&quot;, used: 2730, max: 2730, reclaim time: 0(ns), in flight write RPC: 4, in flight read RPC: 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="152374" author="lixi" created="Mon, 16 May 2016 03:00:55 +0000"  >&lt;p&gt;Anybody has some time to review these two patches?&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/19729/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/19729/&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/19896/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/19896/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;And we are going to working on cgroup support. The current QoS is based on JobID. Since the cgroup path of a task can be got by task_cgroup_path(), we should be able to add cgroup support for QoS easily, like what we did with NRS TBF jobID/NID.&lt;/p&gt;</comment>
                            <comment id="152378" author="lixi" created="Mon, 16 May 2016 03:15:01 +0000"  >&lt;p&gt;I am wondering whether we could add cgroup support for JobID. For example, if obd_jobid_var is &quot;cgroup_path&quot;, we use the path of cgroup as JobID. This prevents duplicated codes, and also enable us to implement  cgroup support for QoS on the server side in the future. &lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 16 May 2016 19:30:16 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzy6qn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 4 Apr 2016 19:30:16 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>