<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:07:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7293] DNE2 perfomance analysis</title>
                <link>https://jira.whamcloud.com/browse/LU-7293</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;I did a detail study of the client scaling behavior for 10k and 100k files per directory using 1,2,4, and 8 MDS servers each having one MDT. I also attempted to collect dat for 16 MDS servers but the results were so bad I didn&apos;t both to finish collecting them since it would take several months to finish the 16 node case.&lt;/p&gt;</description>
                <environment>DNE2 system with up to 16 MDS servers. Uses up to 400 client nodes spread across 20 physical nodes. All the results are based on mdtest 1.9.3 runs.</environment>
        <key id="32633">LU-7293</key>
            <summary>DNE2 perfomance analysis</summary>
                <type id="4" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11310&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="di.wang">Di Wang</assignee>
                                    <reporter username="simmonsja">James A Simmons</reporter>
                        <labels>
                            <label>dne2</label>
                            <label>dne3</label>
                    </labels>
                <created>Tue, 13 Oct 2015 22:45:13 +0000</created>
                <updated>Sat, 12 Jan 2019 04:02:24 +0000</updated>
                            <resolved>Sat, 12 Jan 2019 04:02:24 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>14</watches>
                                                                            <comments>
                            <comment id="130318" author="di.wang" created="Tue, 13 Oct 2015 23:15:26 +0000"  >&lt;p&gt;James, Could you please tell me more of your test? commands?  &quot;MDS striping 8&quot; means the directory stripe_count = 8 ?  Just curious, you did not disable quota in your test right? Thanks&lt;/p&gt;</comment>
                            <comment id="130319" author="simmonsja" created="Tue, 13 Oct 2015 23:31:34 +0000"  >&lt;p&gt;I attached my job script I used. You just need to replace apron with mpirun with the correct node count and thread count. As for quotas I haven&apos;t touch that setting. In the past it was off by default. Is this not the case anymore? Yes the MDS striping is the lfs setdirstripe -c value I used.&lt;/p&gt;</comment>
                            <comment id="130322" author="di.wang" created="Wed, 14 Oct 2015 01:50:34 +0000"  >&lt;p&gt;I think quota is enable by default since 2.4. To disable quota you have to &quot;tune2fs -O ^quota&quot; after reformat. But since there are only 1 MDT per MDS, quota is probably irrelevant. Thanks I will check the script. &lt;/p&gt;</comment>
                            <comment id="130328" author="di.wang" created="Wed, 14 Oct 2015 04:11:53 +0000"  >&lt;p&gt;In mdtest-scale.pbs&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;!/bin/bash
#PBS -l nodes=20
#PBS -l walltime=24:00:00
#PBS -N results-mdtest-scale
#PBS -j oe

MOUNT=sultan
OSTCOUNT=$(lctl get_param -n lov.$MOUNT-clilov*.numobd)
ITER=5

PBS_JOBID=&quot;dne2_8_mds&quot;
BINDIR=/lustre/$MOUNT/stf008/scratch/$USER
OUTDIR=$BINDIR/${PBS_JOBID}_md_test
[ -e $OUTDIR ] || {
        mkdir -p $OUTDIR
        lfs setstripe -c $OSTCOUNT $OUTDIR
}
cd $BINDIR
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It seems this script is for testing OST stripes? you probably post the wrong script? thanks&lt;/p&gt;</comment>
                            <comment id="130387" author="simmonsja" created="Wed, 14 Oct 2015 15:59:53 +0000"  >&lt;p&gt;The script always sets the test directories so all created files are striped across all the OSTs. I do this with all my tests so when I move to our large stripe test setup I can test to ensure 1008 OST stripe files work as well. It also ensure that my fail over testing will always cover active servers on the OSS side. &lt;/p&gt;</comment>
                            <comment id="130396" author="di.wang" created="Wed, 14 Oct 2015 17:16:52 +0000"  >&lt;p&gt;oh, I want to know how the directories are striped among MDT?  Did you set default dir stripes (or /lustre/$MOUNT/stf008/scratch/$USER  ?  If you did, then it probably explain why directory creation is much slower for multiple MDT, because all of directories created here are striped directory.  Anyway, could you please tell me how BINDIR/OUTDIR are striped here?  Thanks.&lt;/p&gt;</comment>
                            <comment id="130413" author="simmonsja" created="Wed, 14 Oct 2015 18:09:59 +0000"  >&lt;p&gt;I precreated each striped/split directory before each run of the mdtest-scale.pbs script. Each directory was called dne2_&quot;stripe_count&quot;_mds. I set remote_dir=1 and remote_dir_gid=-1 so the normal user (as myself) could create the striped directory.&lt;br/&gt;
I did the follow before each test set using 4 as a example:&lt;/p&gt;

&lt;p&gt;lfs setdirstripe -c 4 --index=4 /lustre/sultan/stf008/scratch/jsimmons/dne2_4_mds&lt;br/&gt;
lfs setdirstripe -c 4 --index=4 -D /lustre/sultan/stf008/scratch/jsimmons/dne2_4_mds&lt;/p&gt;

&lt;p&gt;Then I ran the test. I used the -D options so the directories created by mdtest would be the same as dne2_X_mds. I also used the index to avoid filling up my MDS disk with inodes.&lt;/p&gt;</comment>
                            <comment id="130742" author="di.wang" created="Mon, 19 Oct 2015 05:53:01 +0000"  >&lt;p&gt;James: Could you please re-run the test with -F, so only do file test. I want to see if it can show liner performance improvement with only file operation. I suspect there are some interfere between cross-MDT and  single MDT operation. &lt;/p&gt;

&lt;p&gt;And also any reason you use &quot;--index=4 -D&quot; for default stripedEA? That means all of dir create request will be sent MDT4,  which might not be what you want? I would suggest remove --index=4 for default striped EA.  i.e. &lt;br/&gt;
lfs setdirstripe -c4 -D  /lustre/sultan/stf008/scratch/jsimmons/dne2_4_mds&lt;/p&gt;</comment>
                            <comment id="130840" author="adilger" created="Tue, 20 Oct 2015 02:42:00 +0000"  >&lt;p&gt;James, it also doesn&apos;t make sense to create huge numbers of regular files striped across all OSTs.  Applications should either create large numbers of files with one stripe per file (i.e. file per process) or create small numbers of widely striped files (i.e. shared single file).  Doing both at the same time is IMHO not testing what happens on most systems.&lt;/p&gt;

&lt;p&gt;Having large numbers of widely-striped files both stresses OST object creation rates, as well as slowing down the MDS because it needs to store an extra xattr for each file.  In your case with 1008 OSTs, this is actually creating two inodes per file on the MDT in order to store the large xattr (about 24KB/file).&lt;/p&gt;</comment>
                            <comment id="131011" author="simmonsja" created="Wed, 21 Oct 2015 14:51:52 +0000"  >&lt;p&gt;This is a test on a small system with only 56 OSTs. I can try the default stripe of 4 but I don&apos;t expect that much of difference, Our users tend to do one of two things. Use the default setting or do a lsf setstripe -c -1. Also the goal here was to how scaling behaved.&lt;/p&gt;

&lt;p&gt;DiWang our test system is under going a upgrade. It will be a few days before it is finished.&lt;/p&gt;</comment>
                            <comment id="132112" author="simmonsja" created="Thu, 29 Oct 2015 23:38:18 +0000"  >&lt;p&gt;We are in the process of installing perf on our test systems to analysis what is going on. I should have something next week.&lt;/p&gt;</comment>
                            <comment id="132510" author="simmonsja" created="Tue, 3 Nov 2015 17:30:58 +0000"  >&lt;p&gt;I have some good news and some bad news with my testing with perf installed. The good news is I&apos;m seeing much better performance so far with large MDS stripe count. I will start collecting new data soon and post it here. The bad news is when creating one million plus files I&apos;m seeing constant client eviction and reconnects due to time outs from the OSS. I will open a separate ticket for that.&lt;/p&gt;</comment>
                            <comment id="132571" author="simmonsja" created="Wed, 4 Nov 2015 00:06:06 +0000"  >&lt;p&gt;Sorry but I was wrong about performance fixes. My script had a error in it where the default stripe was 1 for mdtest.  I did do some profiling and I&apos;m not seeing anything hogging cycles. What I did see is this with slab top on my client:&lt;/p&gt;

&lt;p&gt; Active / Total Objects (% used)    : 549437 / 559744 (98.2%)&lt;br/&gt;
 Active / Total Slabs (% used)      : 67767 / 67768 (100.0%)&lt;br/&gt;
 Active / Total Caches (% used)     : 108 / 227 (47.6%)&lt;br/&gt;
 Active / Total Size (% used)       : 532288.63K / 534120.31K (99.7%)&lt;br/&gt;
 Minimum / Average / Maximum Object : 0.02K / 0.95K / 32768.00K&lt;/p&gt;

&lt;p&gt;372512 372485  99%    1.12K  53216        7    425728K lustre_inode_cache&lt;br/&gt;
 41552  41431  99%    0.03K    371      112      1484K size-32&lt;br/&gt;
 38600  38575  99%    0.19K   1930       20      7720K dentry&lt;br/&gt;
 15744  15740  99%    0.99K   3936        4     15744K nfs_inode_cache&lt;br/&gt;
 14400  14349  99%    0.12K    480       30      1920K size-128&lt;br/&gt;
 13098  12493  95%    0.06K    222       59       888K size-64&lt;/p&gt;
</comment>
                            <comment id="132576" author="di.wang" created="Wed, 4 Nov 2015 01:50:10 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Sorry but I was wrong about performance fixes.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So you mean &quot;seeing much better performance&quot; is not correct? will you redo the test? Actually I hope you can use Oprofile to profile MDS, so I can see which function or lock is being hit most, then know where is the bottleneck for this load. Thanks.&lt;/p&gt;</comment>
                            <comment id="133058" author="simmonsja" created="Mon, 9 Nov 2015 22:01:43 +0000"  >&lt;p&gt;Here is the perf data I gathered on one of the MDS being used:&lt;/p&gt;

&lt;p&gt;Samples: 136K of event &apos;cycles&apos;, Event count (approx.): 45672465691&lt;br/&gt;
   62.57%  ossec-syscheckd  ossec-syscheckd           &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; 0x0000000000027003&lt;br/&gt;
+   3.20%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; intel_idle&lt;br/&gt;
+   1.57%          swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; intel_idle&lt;br/&gt;
+   0.88%  ossec-syscheckd  libc-2.12.so              &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; memcpy&lt;br/&gt;
+   0.78%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; native_flush_tlb_global&lt;br/&gt;
+   0.67%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; copy_user_generic_string&lt;br/&gt;
+   0.61%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; poll_idle&lt;br/&gt;
+   0.36%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; rb_erase&lt;br/&gt;
+   0.33%          swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; _spin_unlock_irqrestore&lt;br/&gt;
+   0.24%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; _spin_unlock_irqrestore&lt;br/&gt;
+   0.22%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __hrtimer_start_range_ns&lt;br/&gt;
+   0.22%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; clear_page_c&lt;br/&gt;
+   0.21%          swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; rb_erase&lt;br/&gt;
+   0.21%  ossec-syscheckd  libc-2.12.so              &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; vfprintf&lt;br/&gt;
+   0.21%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; page_fault&lt;br/&gt;
+   0.18%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; _spin_lock&lt;br/&gt;
+   0.16%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; file_read_actor&lt;br/&gt;
+   0.15%          swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __remove_hrtimer&lt;br/&gt;
+   0.15%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; rpcauth_lookup_credcache&lt;br/&gt;
+   0.14%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __change_page_attr_set_clr&lt;br/&gt;
+   0.14%          swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __hrtimer_start_range_ns&lt;br/&gt;
+   0.13%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; schedule&lt;br/&gt;
+   0.13%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; lookup_address&lt;br/&gt;
+   0.13%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __alloc_pages_nodemask&lt;br/&gt;
+   0.12%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; tick_nohz_stop_sched_tick&lt;br/&gt;
+   0.11%  kiblnd_sd_00_00  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; find_busiest_group&lt;br/&gt;
+   0.11%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; kmem_cache_alloc&lt;br/&gt;
+   0.11%          swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; poll_idle&lt;br/&gt;
+   0.10%             init  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; cpuidle_idle_call&lt;br/&gt;
+   0.10%  ossec-syscheckd  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;         &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; free_hot_cold_page&lt;/p&gt;

&lt;p&gt;and the slab top gives:&lt;/p&gt;

&lt;p&gt; Active / Total Objects (% used)    : 8211619 / 11280489 (72.8%)&lt;br/&gt;
 Active / Total Slabs (% used)      : 511008 / 511020 (100.0%)&lt;br/&gt;
 Active / Total Caches (% used)     : 127 / 279 (45.5%)&lt;br/&gt;
 Active / Total Size (% used)       : 1746332.12K / 2151220.29K (81.2%)&lt;br/&gt;
 Minimum / Average / Maximum Object : 0.02K / 0.19K / 4096.00K&lt;/p&gt;

&lt;p&gt;  OBJS ACTIVE  USE OBJ SIZE  SLABS OBJ/SLAB CACHE SIZE NAME                   &lt;br/&gt;
5973206 5972436  99%    0.10K 161438       37    645752K buffer_head&lt;br/&gt;
1255968 255927  20%    0.12K  39249       32    156996K lod_obj&lt;br/&gt;
1255968 255927  20%    0.08K  26166       48    104664K mdd_obj&lt;br/&gt;
1255843 255927  20%    0.20K  66097       19    264388K mdt_obj&lt;br/&gt;
281643 281643 100%    1.02K  93881        3    375524K ldiskfs_inode_cache&lt;br/&gt;
255892 255853  99%    0.20K  13468       19     53872K osp_obj&lt;br/&gt;
219184 174975  79%    0.03K   1957      112      7828K size-32&lt;br/&gt;
175761 173927  98%    0.06K   2979       59     11916K size-64&lt;br/&gt;
156680 155264  99%    0.19K   7834       20     31336K dentry&lt;br/&gt;
147532 147478  99%    0.55K  21076        7     84304K radix_tree_node&lt;br/&gt;
110262 110257  99%    1.02K  36754        3    147016K nfs_inode_cache&lt;br/&gt;
 22914  22876  99%    0.64K   3819        6     15276K proc_inode_cache&lt;/p&gt;

&lt;p&gt;From this the MDS doesn&apos;t look to have too heavy of a burden. Its not cpu pegged nor no memory exhaustion.&lt;/p&gt;</comment>
                            <comment id="133060" author="simmonsja" created="Mon, 9 Nov 2015 22:04:36 +0000"  >&lt;p&gt;On the client side I see with perf and slab top:&lt;/p&gt;

&lt;p&gt;+     30.58%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __schedule                                                             -&lt;br/&gt;
+     12.16%         pbs_mom  &lt;span class=&quot;error&quot;&gt;&amp;#91;unknown&amp;#93;&lt;/span&gt;                     &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; 0x7f198dcd6ea3                                                         0&lt;br/&gt;
+      6.21%           apsys  &lt;span class=&quot;error&quot;&gt;&amp;#91;unknown&amp;#93;&lt;/span&gt;                     &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; 0x407a76                                                               &#9618;&lt;br/&gt;
+      2.03%   ptlrpcd_00_02  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __switch_to                                                            &#9618;&lt;br/&gt;
+      1.32%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; read_tsc                                                               &#9618;&lt;br/&gt;
+      1.04%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; load_balance                                                           &#9618;&lt;br/&gt;
+      0.93%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; sched_clock_local                                                      &#9618;&lt;br/&gt;
+      0.90%     kworker/3:1  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; rt_worker_func                                                         &#9618;&lt;br/&gt;
+      0.86%         pbs_mom  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; copy_pte_range                                                         &#9618;&lt;br/&gt;
+      0.74%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; default_idle                                                           &#9618;&lt;br/&gt;
+      0.73%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; run_timer_softirq                                                      &#9618;&lt;br/&gt;
+      0.63%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; trace_hardirqs_off                                                     &#9618;&lt;br/&gt;
+      0.58%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; ktime_get                                                              &#9618;&lt;br/&gt;
+      0.55%         pbs_mom  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; vsnprintf                                                              &#9618;&lt;br/&gt;
+      0.42%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; get_next_timer_interrupt                                               &#9618;&lt;br/&gt;
+      0.40%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; apic_timer_interrupt                                                   &#9618;&lt;br/&gt;
+      0.37%   ptlrpcd_00_00  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; _raw_spin_lock                                                         &#9618;&lt;br/&gt;
+      0.36%          munged  &lt;span class=&quot;error&quot;&gt;&amp;#91;unknown&amp;#93;&lt;/span&gt;                     &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; 0x409380                                                               &#9618;&lt;br/&gt;
+      0.34%           apsys  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; page_fault                                                             &#9618;&lt;br/&gt;
+      0.33%         pbs_mom  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; number                                                                 &#9618;&lt;br/&gt;
+      0.33%            nscd  &lt;span class=&quot;error&quot;&gt;&amp;#91;unknown&amp;#93;&lt;/span&gt;                     &lt;span class=&quot;error&quot;&gt;&amp;#91;.&amp;#93;&lt;/span&gt; 0x7ffb57c0866f                                                         &#9618;&lt;br/&gt;
+      0.32%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; amd_e400_idle                                                          &#9618;&lt;br/&gt;
+      0.32%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; _raw_spin_lock                                                         &#9618;&lt;br/&gt;
+      0.30%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; cpumask_next_and                                                       &#9618;&lt;br/&gt;
+      0.29%         pbs_mom  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; __do_fault                                                             &#9618;&lt;br/&gt;
+      0.29%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; rcu_exit_nohz                                                          &#9618;&lt;br/&gt;
+      0.28%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; x86_pmu_enable_all                                                     &#9618;&lt;br/&gt;
+      0.27%         swapper  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; native_sched_clock                                                     &#9618;&lt;br/&gt;
+      0.26%         pbs_mom  &lt;span class=&quot;error&quot;&gt;&amp;#91;kernel.kallsyms&amp;#93;&lt;/span&gt;             &lt;span class=&quot;error&quot;&gt;&amp;#91;k&amp;#93;&lt;/span&gt; do_task_stat                  &lt;/p&gt;

&lt;p&gt; Active / Total Objects (% used)    : 6238442 / 6248913 (99.8%)&lt;br/&gt;
 Active / Total Slabs (% used)      : 878492 / 878500 (100.0%)&lt;br/&gt;
 Active / Total Caches (% used)     : 108 / 227 (47.6%)&lt;br/&gt;
 Active / Total Size (% used)       : 6911389.05K / 6913362.17K (100.0%)&lt;br/&gt;
 Minimum / Average / Maximum Object : 0.02K / 1.11K / 32768.00K&lt;/p&gt;

&lt;p&gt;  OBJS ACTIVE  USE OBJ SIZE  SLABS OBJ/SLAB CACHE SIZE NAME                   &lt;br/&gt;
6038739 6038710  99%    1.12K 862677        7   6901416K lustre_inode_cache&lt;br/&gt;
 49820  49806  99%    0.19K   2491       20      9964K dentry&lt;br/&gt;
 48160  47939  99%    0.03K    430      112      1720K size-32&lt;br/&gt;
 18236  18232  99%    0.99K   4559        4     18236K nfs_inode_cache&lt;br/&gt;
 17040  16731  98%    0.12K    568       30      2272K size-128&lt;br/&gt;
 13806  13346  96%    0.06K    234       59       936K size-64&lt;br/&gt;
  7832   7790  99%    0.09K    178       44       712K sysfs_dir_cache&lt;br/&gt;
  7329   7309  99%    0.57K   1047        7      4188K inode_cache&lt;br/&gt;
  6440   6276  97%    0.19K    322       20      1288K size-192&lt;br/&gt;
  5159   2745  53%    0.05K     67       77       268K anon_vma_chain&lt;br/&gt;
  3976   3761  94%    0.50K    497        8      1988K size-512&lt;/p&gt;

&lt;p&gt;Here you can see the client nodes are pegged due to swapper running constantly due to the memory pressure on the client. The lustre_inode_cache is huge.&lt;/p&gt;</comment>
                            <comment id="133076" author="di.wang" created="Mon, 9 Nov 2015 23:24:16 +0000"  >&lt;p&gt;James: did this only happen when you do directory creation? or both file and directory creation? how many threads per client? Thanks&lt;/p&gt;

&lt;p&gt;Btw: did you rerun the test with &quot;-F&quot; and remove --index for default stripeEA as comment in Oct 18th. Thanks&lt;/p&gt;</comment>
                            <comment id="137035" author="simmonsja" created="Mon, 21 Dec 2015 15:09:10 +0000"  >&lt;p&gt;I did both file only operations and with directory operations.  I did tracked down the issue of directory operations as well. What is happening in that case is that the lustre inode cache is consuming all the memory on the client thus causing various timeout and client evictions and reconnects. This only happens for when many directory operations are performed. When only doing file operations the memory pressure issues go away. My latest testings as all been without the --index.&lt;/p&gt;</comment>
                            <comment id="137065" author="di.wang" created="Mon, 21 Dec 2015 18:24:07 +0000"  >&lt;p&gt;Hmm, according to the slab information in Nov 15th, it seems &quot;lustre_inode_cache&quot; is much more than &quot;inode_cache&quot;, so it means client has more ll_inode_info than inode,  hmm, maybe ll_inode_info is leaked somewhere. Do you still keep that client? Could you please get lru_size for me?&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lctl get_param ldlm.*.*MDT*.lru_size
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt; </comment>
                            <comment id="137117" author="simmonsja" created="Mon, 21 Dec 2015 22:59:01 +0000"  >&lt;p&gt;Oh this doesn&apos;t look right.&lt;/p&gt;

&lt;p&gt;ldlm.namespaces.sultan-MDT0000-mdc-ffff8803f3d12c00.lru_size=29&lt;br/&gt;
ldlm.namespaces.sultan-MDT0001-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0002-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0003-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0004-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0005-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0006-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0007-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0008-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT0009-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT000a-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT000b-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT000c-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT000d-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT000e-mdc-ffff8803f3d12c00.lru_size=0&lt;br/&gt;
ldlm.namespaces.sultan-MDT000f-mdc-ffff8803f3d12c00.lru_size=0&lt;/p&gt;</comment>
                            <comment id="166428" author="simmonsja" created="Mon, 19 Sep 2016 19:17:37 +0000"  >&lt;p&gt;Here is the final report of our results for our DNE2 performance analysis&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://info.ornl.gov/sites/publications/Files/Pub59510.pdf&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://info.ornl.gov/sites/publications/Files/Pub59510.pdf&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Enjoy the read. Perhaps we can link it to the wiki. If people want it linked to the wiki we can do that&lt;br/&gt;
and then close this ticket. If not we can keep this ticket open for a few more weeks so people can&lt;br/&gt;
have a chance to read this.&lt;/p&gt;</comment>
                            <comment id="166489" author="yujian" created="Tue, 20 Sep 2016 07:51:12 +0000"  >&lt;p&gt;Hi James,&lt;/p&gt;

&lt;p&gt;Thank you very much for the report!&lt;/p&gt;</comment>
                            <comment id="239865" author="pjones" created="Sat, 12 Jan 2019 04:02:24 +0000"  >&lt;p&gt;closing ancient ticket&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="31033">LU-6831</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="19249" name="1-mds-dne2-100k-dir-2.8.png" size="12650" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19250" name="1-mds-dne2-100k-files-2.8.png" size="10768" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19247" name="1-mds-dne2-10k-dir-2.8.png" size="13327" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19248" name="1-mds-dne2-10k-files-2.8.png" size="12496" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19253" name="2-mds-dne2-100k-dir-2.8.png" size="8564" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19254" name="2-mds-dne2-100k-files-2.8.png" size="10913" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19251" name="2-mds-dne2-10k-dir-2.8.png" size="10204" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19252" name="2-mds-dne2-10k-files-2.8.png" size="11283" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19257" name="4-mds-dne2-100k-dir-2.8.png" size="9727" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19258" name="4-mds-dne2-100k-files-2.8.png" size="13487" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19255" name="4-mds-dne2-10k-dir-2.8.png" size="11318" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19256" name="4-mds-dne2-10k-files-2.8.png" size="14052" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19261" name="8-mds-dne2-100k-dir-2.8.png" size="14690" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19262" name="8-mds-dne2-100k-files-2.8.png" size="15336" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19259" name="8-mds-dne2-10k-dir-2.8.png" size="13337" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19260" name="8-mds-dne2-10k-files-2.8.png" size="14443" author="simmonsja" created="Tue, 13 Oct 2015 22:45:13 +0000"/>
                            <attachment id="19263" name="mdtest-scale.pbs" size="953" author="simmonsja" created="Tue, 13 Oct 2015 23:31:34 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 21 Dec 2015 22:45:13 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>Performance</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxqe7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 13 Oct 2015 22:45:13 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>