<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:02:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13597] add processing time/latency, IO sizes, stats to job_stats</title>
                <link>https://jira.whamcloud.com/browse/LU-13597</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;It would be useful to add request processing times/latency and basic IO size information to the &lt;tt&gt;job_stats&lt;/tt&gt; output, similar to &lt;tt&gt;brw_stats&lt;/tt&gt; and other &lt;tt&gt;stats&lt;/tt&gt; files.&lt;/p&gt;

&lt;p&gt;This would allow monitoring per-job request processing performance, to allow differentiation between applications that are doing poorly-formed IO vs. jobs that may be suffering because of other jobs.&lt;/p&gt;</description>
                <environment></environment>
        <key id="59312">LU-13597</key>
            <summary>add processing time/latency, IO sizes, stats to job_stats</summary>
                <type id="4" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11310&amp;avatarType=issuetype">Improvement</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="emoly.liu">Emoly Liu</assignee>
                                    <reporter username="adilger">Andreas Dilger</reporter>
                        <labels>
                    </labels>
                <created>Sat, 23 May 2020 01:56:45 +0000</created>
                <updated>Fri, 11 Jun 2021 23:56:57 +0000</updated>
                            <resolved>Tue, 23 Jun 2020 13:01:10 +0000</resolved>
                                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="271557" author="pjones" created="Fri, 29 May 2020 19:04:17 +0000"  >&lt;p&gt;Emoly&lt;/p&gt;

&lt;p&gt;Could you please look into this?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="271564" author="adilger" created="Fri, 29 May 2020 21:22:54 +0000"  >&lt;p&gt;Emily, implementing this should be similar to the change in patch &lt;a href=&quot;https://review.whamcloud.com/36078&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36078&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12631&quot; title=&quot;Report latency of client operations&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12631&quot;&gt;&lt;del&gt;LU-12631&lt;/del&gt;&lt;/a&gt; llite: report latency for filesystem ops&lt;/tt&gt;&quot; that changed over to using &lt;tt&gt;LPROCFS_TYPE_LATENCY&lt;/tt&gt; for the client stats. &lt;/p&gt;

&lt;p&gt;We should already be tracking the processing times already in the OSS/MDS code, but they just need to be added into the per-job stats. &lt;/p&gt;</comment>
                            <comment id="271620" author="emoly.liu" created="Mon, 1 Jun 2020 10:18:04 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=adilger&quot; class=&quot;user-hover&quot; rel=&quot;adilger&quot;&gt;adilger&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;I saw that your patch at &lt;a href=&quot;https://review.whamcloud.com/#/c/33201/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/33201/&lt;/a&gt;&#160;had already added time information to ***_stats structures. Should I make this one based on that one?&lt;/p&gt;

&lt;p&gt;BTW, our current job_stats output is like:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;obdfilter.lustre-OST0000.job_stats=
job_stats:
- job_id:          cp.0
  snapshot_time:   1591005135
  read_bytes:      { samples:           0, unit: bytes, min:       0, max:       0, sum:               0 }
  write_bytes:     { samples:           1, unit: bytes, min:    1689, max:    1689, sum:            1689 }
  getattr:         { samples:           0, unit:  reqs }
  setattr:         { samples:           0, unit:  reqs }
  punch:           { samples:           0, unit:  reqs }
  sync:            { samples:           0, unit:  reqs }
  destroy:         { samples:           0, unit:  reqs }
  create:          { samples:           0, unit:  reqs }
  statfs:          { samples:           0, unit:  reqs }
  get_info:        { samples:           0, unit:  reqs }
  set_info:        { samples:           0, unit:  reqs }
  quotactl:        { samples:           0, unit:  reqs }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Besides adding time information, using LPROCFS_TYPE_LATENCY for all the requests, and anything else?&lt;/p&gt;</comment>
                            <comment id="271636" author="adilger" created="Mon, 1 Jun 2020 13:23:20 +0000"  >&lt;p&gt;The &lt;tt&gt;read_bytes&lt;/tt&gt; and &lt;tt&gt;write_bytes&lt;/tt&gt; would stay with &lt;tt&gt;bytes&lt;/tt&gt; units, but the others would change to &lt;tt&gt;usec&lt;/tt&gt; units and show min/max/sum values. It would also be good to add a separate &lt;tt&gt;usec&lt;/tt&gt; counter for the &lt;tt&gt;read&lt;/tt&gt; and &lt;tt&gt;write&lt;/tt&gt; operations, so that it is possible to calculate bandwidth. &lt;/p&gt;</comment>
                            <comment id="271825" author="emoly.liu" created="Wed, 3 Jun 2020 09:35:59 +0000"  >&lt;p&gt;The job_stats from ofd have been changed. The output is like:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;- job_id:          cp.0
  snapshot_time:   1591174981
  read_bytes:      { samples:           0, unit: bytes, min:       0, max:       0, sum:               0, sumsq:                  0 }
  write_bytes:     { samples:           1, unit: bytes, min:    2765, max:    2765, sum:            2765, sumsq:            7645225 }
  read_time:       { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  write_time:      { samples:           1, unit: usecs, min:      29, max:      29, sum:              29, sumsq:                841 }
  getattr:         { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  setattr:         { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  punch:           { samples:           1, unit: usecs, min:      49, max:      49, sum:              49, sumsq:               2401 }
  sync:            { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  destroy:         { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  create:          { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  statfs:          { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  get_info:        { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  set_info:        { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  quotactl:        { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  prealloc:        { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Do the ones from mdt need to be changed too?&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;mdt.lustre-MDT0000.job_stats=
job_stats:
- job_id:          cp.0
  snapshot_time:   1591175698
  open:            { samples:           1, unit:  reqs }
  close:           { samples:           1, unit:  reqs }
  mknod:           { samples:           1, unit:  reqs }
  link:            { samples:           0, unit:  reqs }
  unlink:          { samples:           0, unit:  reqs }
  mkdir:           { samples:           0, unit:  reqs }
  rmdir:           { samples:           0, unit:  reqs }
  rename:          { samples:           0, unit:  reqs }
  getattr:         { samples:           1, unit:  reqs }
  setattr:         { samples:           0, unit:  reqs }
  getxattr:        { samples:           1, unit:  reqs }
  setxattr:        { samples:           0, unit:  reqs }
  statfs:          { samples:           0, unit:  reqs }
  sync:            { samples:           0, unit:  reqs }
  samedir_rename:  { samples:           0, unit:  reqs }
  crossdir_rename: { samples:           0, unit:  reqs }
  read_bytes:      { samples:           0, unit:  reqs, min:       0, max:       0, sum:               0 }
  write_bytes:     { samples:           0, unit:  reqs, min:       0, max:       0, sum:               0 }
  punch:           { samples:           0, unit:  reqs }
  migrate:         { samples:           0, unit:  reqs }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="271831" author="emoly.liu" created="Wed, 3 Jun 2020 11:00:20 +0000"  >&lt;p&gt;Emoly Liu (emoly@whamcloud.com) uploaded a new patch:&lt;a href=&quot;https://review.whamcloud.com/38816&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/38816&lt;/a&gt;&lt;br/&gt;
Subject:&#160;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13597&quot; title=&quot;add processing time/latency, IO sizes, stats to job_stats&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13597&quot;&gt;&lt;del&gt;LU-13597&lt;/del&gt;&lt;/a&gt; ofd: add more information to job_stats&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: a9aff2fad827da1b1dc521ed29b001f003a4b30d&lt;/p&gt;</comment>
                            <comment id="271919" author="adilger" created="Thu, 4 Jun 2020 02:49:09 +0000"  >&lt;blockquote&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;  write_bytes:     { samples:           1, unit: bytes, min:    2765, max:    2765, sum:            2765, sumsq:            7645225 }
  write_time:      { samples:           1, unit: usecs, min:      29, max:      29, sum:              29, sumsq:                841 }
  punch:           { samples:           1, unit: usecs, min:      49, max:      49, sum:              49, sumsq:               2401 }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/blockquote&gt;
&lt;p&gt;This looks good, writing about 95MB/s...&lt;/p&gt;
&lt;blockquote&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;  read_bytes:      { samples:           0, unit:  reqs, min:       0, max:       0, sum:               0 }
  write_bytes:     { samples:           0, unit:  reqs, min:       0, max:       0, sum:               0 }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/blockquote&gt;
&lt;p&gt;This looks like a pre-existing bug - it should be units of &quot;bytes&quot; and not &quot;reqs&quot;.&lt;/p&gt;

&lt;p&gt;In any case, the MDS jobstats should also be updated to report usec.&lt;/p&gt;</comment>
                            <comment id="271944" author="emoly.liu" created="Thu, 4 Jun 2020 11:14:57 +0000"  >&lt;p&gt;The job_stats output of MDT is like:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;- job_id:          touch.500
  snapshot_time:   1591267859
  open:            { samples:           1, unit: usecs, min:     146, max:     146, sum:             146, sumsq:              21316 }
  close:           { samples:           1, unit: usecs, min:      17, max:      17, sum:              17, sumsq:                289 }
  mknod:           { samples:           1, unit: usecs, min:     124, max:     124, sum:             124, sumsq:              15376 }
  link:            { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  unlink:          { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  mkdir:           { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  rmdir:           { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  rename:          { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  getattr:         { samples:           1, unit: usecs, min:       8, max:       8, sum:               8, sumsq:                 64 }
  setattr:         { samples:           1, unit: usecs, min:      27, max:      27, sum:              27, sumsq:                729 }
  getxattr:        { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  setxattr:        { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  statfs:          { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  sync:            { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  samedir_rename:  { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  crossdir_rename: { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  read_bytes:      { samples:           0, unit: bytes, min:       0, max:       0, sum:               0, sumsq:                  0 }
  write_bytes:     { samples:           0, unit: bytes, min:       0, max:       0, sum:               0, sumsq:                  0 }
  punch:           { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
  migrate:         { samples:           0, unit: usecs, min:       0, max:       0, sum:               0, sumsq:                  0 }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Since the unit is changed from reqs to usecs, sometimes the sumsq looks big.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;setattr:         { samples:           1, unit: usecs, min:   15492, max:   15492, sum:           15492, sumsq:          240002064 }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="273550" author="gerrit" created="Tue, 23 Jun 2020 08:12:18 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/38816/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/38816/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13597&quot; title=&quot;add processing time/latency, IO sizes, stats to job_stats&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13597&quot;&gt;&lt;del&gt;LU-13597&lt;/del&gt;&lt;/a&gt; ofd: add more information to job_stats&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: cd8fb1e8d300c0622200d6b25d187d8d0472e79f&lt;/p&gt;</comment>
                            <comment id="273576" author="pjones" created="Tue, 23 Jun 2020 13:01:10 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="56581">LU-12631</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="59714">LU-13716</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="59797">LU-13733</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="35543">LU-7909</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="53365">LU-11407</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="57177">LU-12872</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0115b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>