<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:52:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5608] Performance regression of removal operation with mdtest stride option</title>
                <link>https://jira.whamcloud.com/browse/LU-5608</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While comparing Lustre 1.8 series with latest master release in client. Server is running the same 2.5 series. we found there is big file removal regression.&lt;/p&gt;

&lt;p&gt;Testing command is:&lt;/p&gt;

&lt;ol&gt;
	&lt;li&gt;mpirun -bind-to core:overload-allowed --map-by ppr:32:node --allow-run-as-root -np 512 -hostfile ./hostfile.32 ./mdtest -n 2000 -i 3 -p 10 -u -d /lustre_
{0-31}
&lt;p&gt;/mdtest.out -F -N 32&lt;/p&gt;&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;While comparing file removal performance between 1.8 and master is:&lt;br/&gt;
              61476.175 op/seconds  VS   39640.455 op/seconds.&lt;/p&gt;

&lt;p&gt;Big regression, isn&apos;t it?&lt;/p&gt;

&lt;p&gt;Notice here we need use &apos;-N&apos; option for mdtest, the problem seems only reproducible under multiple clients. Attachment is modified mdtest source codes which could help reproduce this problem. &lt;/p&gt;</description>
                <environment></environment>
        <key id="26491">LU-5608</key>
            <summary>Performance regression of removal operation with mdtest stride option</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="4">Incomplete</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="wangshilong">Wang Shilong</reporter>
                        <labels>
                    </labels>
                <created>Thu, 11 Sep 2014 10:41:23 +0000</created>
                <updated>Wed, 13 Oct 2021 03:14:47 +0000</updated>
                            <resolved>Wed, 13 Oct 2021 03:14:47 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="93856" author="ihara" created="Fri, 12 Sep 2014 16:12:24 +0000"  >&lt;p&gt;Here is benchamrk results with master branch and 1.8.9 on clients. Server is running lustre-2.5.&lt;br/&gt;
mdtest supports stride opton (-N &amp;lt;n&amp;gt;) which can be avoiding cache for locks for created files. This regression happens when &quot;-N&quot; option is enabled on lustre-2.6 clients. the performance drops by more than 45% compared lustre-1.8 client.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;32 clients, 64 processes
No stride
# mdtest -n 16384 -i 3 -p 10 -d /lustre_0/mdtest.out -F -u 

Stride=2, because, two mdtest threads are running on same client
# mdtest -n 16384 -i 3 -p 10 -d /lustre_0/mdtest.out -F -u -N 2 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;No Stride&lt;/p&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&amp;nbsp;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Creation&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Stat&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Read &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;File removal&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1.8.9 client &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 90548 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;224747&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;166181 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;102922&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2.6.62 client&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 83073&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;195469&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 128705 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;102793        &lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;


&lt;p&gt;Stride=2&lt;/p&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&amp;nbsp;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Creation&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Stat&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Read &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;File removal&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1.8.9 client &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;   83908&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 224870  &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;162568        &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 51753&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2.6.62 client&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 87455&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;205767&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 156613 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;font color=&quot;red&quot;&gt;33621&lt;/font&gt;        &lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;
</comment>
                            <comment id="93880" author="spitzcor" created="Fri, 12 Sep 2014 18:45:37 +0000"  >&lt;p&gt;This seems to be related (or a duplicate) of &lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-1167&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-1167&lt;/a&gt; and &lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-3308&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-3308&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="93908" author="ihara" created="Sat, 13 Sep 2014 00:43:15 +0000"  >&lt;p&gt;I don&apos;t know what type of metadata workload &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1167&quot; title=&quot;Poor mdtest unlink performance with multiple processes per node&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1167&quot;&gt;LU-1167&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3308&quot; title=&quot;large readdir chunk size slows unlink/&amp;quot;rm -r&amp;quot; performance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3308&quot;&gt;LU-3308&lt;/a&gt; did, but I think this is different issue.&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5608&quot; title=&quot;Performance regression of removal operation with mdtest stride option&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5608&quot;&gt;&lt;del&gt;LU-5608&lt;/del&gt;&lt;/a&gt; seems to be related to layout lcok which is not introduced in lustre-2.1 or 2.2. &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1167&quot; title=&quot;Poor mdtest unlink performance with multiple processes per node&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1167&quot;&gt;LU-1167&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3308&quot; title=&quot;large readdir chunk size slows unlink/&amp;quot;rm -r&amp;quot; performance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3308&quot;&gt;LU-3308&lt;/a&gt; didn&apos;t mention about it becouse layout lock was not availabe at that tiem.&lt;br/&gt;
I will post another benchmark results to confirm our asusme is correct.&lt;/p&gt;</comment>
                            <comment id="93915" author="ihara" created="Sat, 13 Sep 2014 02:08:49 +0000"  >&lt;p&gt;lustre-1.8.9 dosn&apos;t have layout lock. So, just in case, in order to make sure if layout lock might be related, I applied following patches to force disable layout lock with 2.6.52 client.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Index: lustre-release.git/lustre/llite/llite_lib.c
===================================================================
--- lustre-release.git.orig/lustre/llite/llite_lib.c
+++ lustre-release.git/lustre/llite/llite_lib.c
@@ -211,7 +211,7 @@ &lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; client_common_fill_super(stru
                                   OBD_CONNECT_FULL20   | OBD_CONNECT_64BITHASH|
 				  OBD_CONNECT_EINPROGRESS |
 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
-				  OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS |
+				  OBD_CONNECT_PINGLESS |
 				  OBD_CONNECT_MAX_EASIZE |
 				  OBD_CONNECT_FLOCK_DEAD |
 				  OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
@@ -416,7 +416,6 @@ &lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; client_common_fill_super(stru
                                   OBD_CONNECT_MAXBYTES |
 				  OBD_CONNECT_EINPROGRESS |
 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
-				  OBD_CONNECT_LAYOUTLOCK |
 				  OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK;
 
         &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (sbi-&amp;gt;ll_flags &amp;amp; LL_SBI_SOM_PREVIEW)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Here is test results. 32 clients, 64 process, 1M files for creation/stats/removal.&lt;/p&gt;

&lt;p&gt;No Stride&lt;/p&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&amp;nbsp;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Creation&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Stat&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Read &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;File removal&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1.8.9 client &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 90548 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;224747&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;166181 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;102922&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2.6.62 client&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 83073&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;195469&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 128705 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;102793        &lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;patched 2.6.62 client&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 89381&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;186802&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 120346 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;font color=&quot;red&quot;&gt;83731&lt;/font&gt;         &lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;


&lt;p&gt;Stride=2&lt;/p&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&amp;nbsp;&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Creation&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Stat&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; File Read &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;File removal&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1.8.9 client &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;   83908&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 224870  &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;162568        &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 51753&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2.6.62 client&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 87455&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;205767&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 156613 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;font color=&quot;red&quot;&gt;33621&lt;/font&gt;        &lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;patched 2.6.62 client&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 89275&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;&lt;font color=&quot;red&quot;&gt;182131&lt;/font&gt; &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt; 153786 &lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;49672       &lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;


&lt;p&gt;Stride enabled &quot;File removal&quot; performance significant improved and it&apos;s close to lustre-1.8.9&apos;s numbers.&lt;br/&gt;
However, without stride, &quot;File removal&quot; performance dropped and &quot;File stat&quot; performance also dropped when stride is enabled.&lt;/p&gt;</comment>
                            <comment id="93937" author="pjones" created="Sat, 13 Sep 2014 15:39:59 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please comment?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="94372" author="laisiyao" created="Thu, 18 Sep 2014 13:10:17 +0000"  >&lt;p&gt;This looks to be caused by statahead, because for mdtest stride option, statahead won&apos;t help, and cause overhead. And in current statahead implementation, each stat will try statahead, though it will fail because the stat entry is not first directory entry, which will cause more overhead. Hopefully &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3270&quot; title=&quot;ptlrpcd strnlen crash trying to log a message&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3270&quot;&gt;&lt;del&gt;LU-3270&lt;/del&gt;&lt;/a&gt; can help this issue, because a patch for that will disable statahead upon previous statahead failure.&lt;/p&gt;

&lt;p&gt;Could you disable statahead on master client, and run this test again?&lt;/p&gt;

&lt;p&gt;In the mean time, I&apos;ll do this test against &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3270&quot; title=&quot;ptlrpcd strnlen crash trying to log a message&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3270&quot;&gt;&lt;del&gt;LU-3270&lt;/del&gt;&lt;/a&gt; code to verify also.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="15726" name="mdtest-HEAD-f2bf8ac.tar" size="184320" author="wangshilong" created="Thu, 11 Sep 2014 10:41:24 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Thu, 18 Sep 2014 10:41:23 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwvzz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15687</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Thu, 11 Sep 2014 10:41:23 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>