<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:05:58 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13997] sanity test_418: df and lfs df blocks output mismatch</title>
                <link>https://jira.whamcloud.com/browse/LU-13997</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for wangshilong &amp;lt;wshilong@ddn.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/f01e487a-628b-41e2-9646-67794dd75816&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/f01e487a-628b-41e2-9646-67794dd75816&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_418 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&apos;df and lfs df inodes output mismatch:  df -i: 10.9.4.8@tcp:/lustre 916924 14005 902919 2% /mnt/lustre,  lfs df -i: filesystem_summary: 917922 15003 902919 2% /mnt/lustre&apos;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&amp;lt;&amp;lt;Please provide additional information about the failure here&amp;gt;&amp;gt;&lt;/p&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
sanity test_418 - df and lfs df blocks output mismatch:  df -i: 10.9.4.8@tcp:/lustre 916924 14005 902919 2% /mnt/lustre,  lfs df -i: filesystem_summary: 917922 15003 902919 2% /mnt/lustre&apos;&lt;/p&gt;</description>
                <environment></environment>
        <key id="60949">LU-13997</key>
            <summary>sanity test_418: df and lfs df blocks output mismatch</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="4" iconUrl="https://jira.whamcloud.com/images/icons/statuses/reopened.png" description="This issue was once resolved, but the resolution was deemed incorrect. From here issues are either marked assigned or resolved.">Reopened</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>always_except</label>
                    </labels>
                <created>Fri, 25 Sep 2020 14:13:58 +0000</created>
                <updated>Thu, 2 Dec 2021 16:55:07 +0000</updated>
                                            <version>Lustre 2.15.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="308628" author="bzzz" created="Wed, 28 Jul 2021 04:21:23 +0000"  >&lt;p&gt; on master: &lt;a href=&quot;https://testing.whamcloud.com/test_sessions/7efffeb4-d38b-4623-843a-bb9823e14897&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/7efffeb4-d38b-4623-843a-bb9823e14897&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="308645" author="bzzz" created="Wed, 28 Jul 2021 11:23:37 +0000"  >&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/04bd7c7f-cce4-4f68-adfd-81cbfa3062d0&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/04bd7c7f-cce4-4f68-adfd-81cbfa3062d0&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="308739" author="adilger" created="Thu, 29 Jul 2021 06:47:11 +0000"  >&lt;p&gt;+1 on master &lt;a href=&quot;https://testing.whamcloud.com/test_sets/82e1130a-47d7-4062-ac9e-cd13d44b3b7a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/82e1130a-47d7-4062-ac9e-cd13d44b3b7a&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="308928" author="emoly.liu" created="Fri, 30 Jul 2021 14:41:54 +0000"  >&lt;p&gt;+1 on master&#160;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/5d0de25e-6cbb-46a8-9c75-2bbd5c350945&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/5d0de25e-6cbb-46a8-9c75-2bbd5c350945&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="310972" author="hornc" created="Tue, 24 Aug 2021 13:38:17 +0000"  >&lt;p&gt;+1 on master &lt;a href=&quot;https://testing.whamcloud.com/test_sets/253d6740-d5c2-4113-8765-731985896f6b&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/253d6740-d5c2-4113-8765-731985896f6b&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="311489" author="qian_wc" created="Sat, 28 Aug 2021 14:51:39 +0000"  >&lt;p&gt;+1 on ES6 &lt;a href=&quot;https://testing.whamcloud.com/test_sets/5635e7ed-ef1a-4c8b-b11b-d3b4d76a91fb&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/5635e7ed-ef1a-4c8b-b11b-d3b4d76a91fb&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="311499" author="bzzz" created="Mon, 30 Aug 2021 04:45:59 +0000"  >&lt;p&gt;I noticed that check_lfs_df() function doesn&apos;t wait at all while it should do to let cached data to expire?&lt;/p&gt;</comment>
                            <comment id="311616" author="adilger" created="Mon, 30 Aug 2021 21:26:13 +0000"  >&lt;p&gt;I see there is a &quot;&lt;tt&gt;sleep 0.2&lt;/tt&gt;&quot; inside the loop to wait until there is an update:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; count in {1..100}; &lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
                cancel_lru_locks
                sync; sleep 0.2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="311767" author="adilger" created="Wed, 1 Sep 2021 00:20:15 +0000"  >&lt;p&gt;While this issue has been open since 2020-09-25, it really only started failing after 2021-07-20.  There was one failure on 2021-07-20, and another single failure on 2021-07-22, then between 4-10 per day since 2021-07-25.  Patches landed in that date range are:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;git log --oneline --before 2021-07-25 --after 2021-07-19
233344d451 LU-13417 test: generate uneven MDTs early for sanity 413
e90794af4b LU-14868 llite: revert &apos;simplify callback handling for async getattr&apos;
cbc62b0b82 LU-14826 mdt: getattr_name(&quot;..&quot;) under striped directory
6a4be282bb LU-14833 sec: quiet spurious gss_init_svc_upcall() message
f75ff33d9f LU-14114 lnet: print device status in net show command
601c48f3ec LU-14804 nodemap: do not return error for improper ACL
188112fc80 LU-14300 quota: avoid nested lqe lookup
9f672d8a71 LU-14508 lfs: make mirror operations preserve timestamps
253a9e3bcd LU-12214 build: fix SLES build/install
9e1e29406c LU-12022 tests: error on resync failure sanity-flr
6cba1b949d LU-6142 libcfs: checkpatch cleanup of libcfs fail.c
9976d2c35d LU-6142 lnet: convert kiblnd/ksocknal_thread_start to vararg
a39f078041 LU-6142 lnet: discard lnet_current_net_count
a3f5aa624b LU-14217 osd-zfs: allow SEEK_HOLE/DATA only with sync
0f8db7e06a LU-14805 llite: No locked parallel DIO
1dbe63301b LU-13440 utils: fix handling of lsa_stripe_off = -1
f2a16793fa LU-14541 llite: avoid stale data reading
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;My guess is that &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13417&quot; title=&quot;DNE3: mkdir() automatically create remote directory on MDS which has more space&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13417&quot;&gt;&lt;del&gt;LU-13417&lt;/del&gt;&lt;/a&gt; test: generate uneven MDTs early for sanity 413&lt;/tt&gt;&quot; or &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13440&quot; title=&quot;DNE3: limit directory default layout inheritance&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13440&quot;&gt;&lt;del&gt;LU-13440&lt;/del&gt;&lt;/a&gt; utils: fix handling of lsa_stripe_off = -1&lt;/tt&gt;&quot; causing some kind of difference in space usage for DNE that only shows up in &quot;&lt;tt&gt;lfs df -i&lt;/tt&gt;&quot; (e.g. many remote entries), or possibly stray file/object deletions still happening in the background?&lt;/p&gt;</comment>
                            <comment id="311781" author="bzzz" created="Wed, 1 Sep 2021 07:20:48 +0000"  >&lt;p&gt;I see in the logs the following lines, repeating many times during 418:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
00000004:00000024:1.0:1630450218.589991:0:617469:0:(mdt_handler.c:547:mdt_statfs()) blocks cached 10731520 granted 354615296 pending 0 free 1165684736 avail 1060012032
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;i.e. tgd_tot_dirty= 10731520&lt;br/&gt;
it looks like cancel_lru_locks in check_lfs_df() doesn&apos;t flush dirty DoM pages? or the dirty pages are on another client?&lt;/p&gt;
</comment>
                            <comment id="311785" author="gerrit" created="Wed, 1 Sep 2021 08:54:45 +0000"  >&lt;p&gt;&quot;Alex Zhuravlev &amp;lt;bzzz@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/44803&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/44803&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13997&quot; title=&quot;sanity test_418: df and lfs df blocks output mismatch&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13997&quot;&gt;LU-13997&lt;/a&gt; tests: sanity/418 to cancel all client locks&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 37505dfa7cd3a24e78c07be155481217a2258866&lt;/p&gt;</comment>
                            <comment id="312087" author="adilger" created="Fri, 3 Sep 2021 18:46:19 +0000"  >&lt;p&gt;This is failing about 1/25 of the test runs this week (31/695).&lt;/p&gt;</comment>
                            <comment id="312115" author="gerrit" created="Sat, 4 Sep 2021 12:26:13 +0000"  >&lt;p&gt;&quot;Alex Zhuravlev &amp;lt;bzzz@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/44845&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/44845&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13997&quot; title=&quot;sanity test_418: df and lfs df blocks output mismatch&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13997&quot;&gt;LU-13997&lt;/a&gt; tests: sanity/418 reproducer&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2aa6902a3dd18e9f4b97fafa4e4f82c6bd6c258d&lt;/p&gt;</comment>
                            <comment id="312492" author="adilger" created="Fri, 10 Sep 2021 17:59:28 +0000"  >&lt;p&gt;I&apos;m not sure if this is relevant or not, but it appears that this problem hits much more frequently on ldiskfs than ZFS.  For ZFS there were 3/220 failures (1/73) in the past 4 weeks, and no failures in the past 2 weeks.  For ldiskfs, there were 84 / 1403 failures (1/16) with 35 in the past week (including the 95 test passes for the two patches for this issue).&lt;/p&gt;</comment>
                            <comment id="312543" author="adilger" created="Sat, 11 Sep 2021 07:28:39 +0000"  >&lt;p&gt;Alex wrote:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;I noticed the following message on MDT, repeating many times:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000004:00000024:1.0:1630450218.589991:0:617469:0:(mdt_handler.c:547:mdt_statfs())
   blocks cached 10731520 granted 354615296 pending 0 free 1165684736 avail 1060012032
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;where cached are blocks reported &quot;dirty&quot; by the client my understanding is that it&apos;s DoM data to be flushed&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Interesting.  Looking at &lt;tt&gt;mdt_statfs()&lt;/tt&gt;, it subtracts (&lt;tt&gt;tgd-&amp;gt;tgd_tot_dirty + tgd-&amp;gt;tgd_tot_pending&lt;/tt&gt;) from &lt;tt&gt;os_bavail&lt;/tt&gt; before returning it to the client, but when the clients do &quot;&lt;tt&gt;lfs df&lt;/tt&gt;&quot; they ignore the MDT &lt;tt&gt;os_bavail&lt;/tt&gt; value when calculating available space, and only OST &lt;tt&gt;os_bavail&lt;/tt&gt; is used (for better or worse), and that is the inconsistent value...&lt;/p&gt;

&lt;p&gt;This really points to some other bug, that MDC/MDT grant is not calculated correctly, or that dirty DoM files are not being flushed from the clients properly, or something, since test_418 is waiting at least 20s for the output to stabilize, plus another 50s or more for test_414..test_417.&lt;/p&gt;

&lt;p&gt;It looks like &lt;tt&gt;test_413a-&amp;gt;generate_uneven_mdts()&lt;/tt&gt; is creating a lot of DoM files, and test_413z is deleting them, since v2_14_52-97-g233344d451 on master (2021-07-25).  This aligns with the start of high-frequency test_418 failures on master.  In fact, the first &quot;recent&quot; failure was 2021-07-23 on patch &lt;a href=&quot;https://review.whamcloud.com/44384&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/44384&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13417&quot; title=&quot;DNE3: mkdir() automatically create remote directory on MDS which has more space&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13417&quot;&gt;&lt;del&gt;LU-13417&lt;/del&gt;&lt;/a&gt; test: generate uneven MDTs early for sanity 413&lt;/tt&gt;&quot; &lt;b&gt;before it landed&lt;/b&gt;, which is the patch that introduced &lt;tt&gt;generate_uneven_mdts()&lt;/tt&gt;.  Before that landed test_418 only failed 0/1 times per month.&lt;/p&gt;

&lt;p&gt;So it looks like patch 44803 may avoid this test failure, but there is something strange happening with dirty DoM files...&lt;/p&gt;</comment>
                            <comment id="312546" author="bzzz" created="Sat, 11 Sep 2021 07:41:26 +0000"  >&lt;p&gt;check_lfs_df() function cancels only local locks, but iirc sanity may use two clients?&lt;/p&gt;</comment>
                            <comment id="312657" author="adilger" created="Mon, 13 Sep 2021 20:05:10 +0000"  >&lt;blockquote&gt;
&lt;p&gt;check_lfs_df() function cancels only local locks, but iirc sanity may use two clients?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Sure, but the dirty DoM data should be flushed by itself far sooner than 60-70s since the files were written.  I suppose it might be possible that the data is actually written, but the client did not generate any more RPCs, so the &lt;tt&gt;tgt_tot_dirty&lt;/tt&gt; was not updated on the server since the last write?  If that is the case, then the fix in patch 44384 is the right one and the problem is only in the test.&lt;/p&gt;

&lt;p&gt;I just want to make sure we aren&apos;t missing some other problem, where the DoM file writes are never being sent to the MDT unless the lock is cancelled.  Similar bugs have happened in the past, and writes from minutes/hours earlier are lost if the client is evicted or fails.  It would be possible to create a test that writes some DoM files, waits 60s (2x the &lt;tt&gt;vm.dirty_expire_centisecs=3000&lt;/tt&gt; interval for data, and 12x the &lt;tt&gt;vm.dirty_writeback_centisecs=500&lt;/tt&gt; interval for metadata), evicts the client, and then checks that the files contain valid data.&lt;/p&gt;</comment>
                            <comment id="312729" author="bzzz" created="Tue, 14 Sep 2021 13:30:50 +0000"  >&lt;p&gt;here is a simple script...&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
+       &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; ((i=0; i &amp;lt; 100; i++)); &lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
+               $LFS setstripe -E 1M -L mdt $DIR/$tfile-$i || error &lt;span class=&quot;code-quote&quot;&gt;&quot;can&apos;t setstripe&quot;&lt;/span&gt;
+               dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/zero of=$DIR/$tfile-$i bs=128k count=1 || error &lt;span class=&quot;code-quote&quot;&gt;&quot;can&apos;t dd&quot;&lt;/span&gt;
+       done
+       start=$SECONDS
+       &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; ((i=0; i &amp;lt; 100; i++)); &lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
+               grant_dirty=$($LCTL get_param -n  mdt.*.tot_dirty |
+                       awk &lt;span class=&quot;code-quote&quot;&gt;&apos;{sum=sum+$1} END {print sum}&apos;&lt;/span&gt;)
+               vm_dirty=$(awk &lt;span class=&quot;code-quote&quot;&gt;&apos;/Dirty:/{print $2}&apos;&lt;/span&gt; /proc/meminfo)
+               echo &lt;span class=&quot;code-quote&quot;&gt;&quot;dirty after $((SECONDS-start)): $grant_dirty in grants, $vm_dirty in system&quot;&lt;/span&gt;
+               sleep 2
+               (( vm_dirty == 0 )) &amp;amp;&amp;amp; &lt;span class=&quot;code-keyword&quot;&gt;break&lt;/span&gt;
+       done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;... and its output:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
dirty after 0: 1966080 in grants, 12292 in system
dirty after 2: 1966080 in grants, 12392 in system
dirty after 4: 1966080 in grants, 12392 in system
dirty after 6: 1966080 in grants, 12424 in system
dirty after 8: 1966080 in grants, 12176 in system
dirty after 10: 1966080 in grants, 12176 in system
dirty after 12: 1966080 in grants, 12176 in system
dirty after 14: 1966080 in grants, 12176 in system
Lustre: lustre-OST0000-osc-ffff8801ee82e000: disconnect after 22s idle
dirty after 16: 1966080 in grants, 11712 in system
dirty after 18: 1966080 in grants, 11648 in system
dirty after 20: 1966080 in grants, 11648 in system
dirty after 22: 1966080 in grants, 11648 in system
dirty after 24: 1966080 in grants, 11664 in system
dirty after 26: 1966080 in grants, 11664 in system
dirty after 28: 1966080 in grants, 11664 in system
dirty after 30: 3579904 in grants, 2064 in system
dirty after 32: 3579904 in grants, 2064 in system
dirty after 34: 155648 in grants, 440 in system
dirty after 37: 155648 in grants, 416 in system
dirty after 39: 155648 in grants, 416 in system
dirty after 41: 155648 in grants, 416 in system
dirty after 43: 155648 in grants, 504 in system
dirty after 45: 155648 in grants, 16 in system
dirty after 47: 155648 in grants, 16 in system
dirty after 49: 155648 in grants, 16 in system
dirty after 51: 155648 in grants, 16 in system
dirty after 53: 155648 in grants, 0 in system
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;i.e. all dirty pages had been flushed, but MDT still counts few pages dirty&lt;/p&gt;</comment>
                            <comment id="312809" author="adilger" created="Tue, 14 Sep 2021 23:27:52 +0000"  >&lt;blockquote&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;:
dirty after 53: 155648 in grants, 0 in system
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;all dirty pages had been flushed, but MDT still counts few pages dirty&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Thanks for testing this. It looks like the test with your patch should fix this problem.&lt;/p&gt;</comment>
                            <comment id="312825" author="bzzz" created="Wed, 15 Sep 2021 04:55:50 +0000"  >&lt;p&gt;actually now I doubt the patch will really help as nothing I tried (cancel_lru_locks, extra RPC, waiting some time) change tot_dirty on MDT side.&lt;/p&gt;</comment>
                            <comment id="312840" author="bzzz" created="Wed, 15 Sep 2021 08:41:14 +0000"  >&lt;p&gt;the problem is that grants info is passed with OST_READ/OST_WRITE. so even with no actual dirty pages on the client side MDT doesn&apos;t get any updates and keeps tot_total_dirty &amp;gt; 0 which in turn affects statfs calculation.&lt;/p&gt;</comment>
                            <comment id="312842" author="bzzz" created="Wed, 15 Sep 2021 08:46:53 +0000"  >&lt;p&gt;another observation is that grant shrink seem to happen (and handled) to OSC-OST only.&lt;/p&gt;</comment>
                            <comment id="312974" author="adilger" created="Thu, 16 Sep 2021 00:17:32 +0000"  >&lt;p&gt;We  need a separate patch (separate ticket?) for grant shrink to the MDTs &lt;/p&gt;</comment>
                            <comment id="312990" author="bzzz" created="Thu, 16 Sep 2021 04:18:48 +0000"  >&lt;p&gt;but grant shrinking mechanism has own limits too:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        /* Don&apos;t shrink &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; we are already above or below the desired limit
         * We don&apos;t want to shrink below a single RPC, as that will negatively
         * impact block allocation and &lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;-term performance. */
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (target_bytes &amp;lt; cli-&amp;gt;cl_max_pages_per_rpc &amp;lt;&amp;lt; PAGE_SHIFT)
                target_bytes = cli-&amp;gt;cl_max_pages_per_rpc &amp;lt;&amp;lt; PAGE_SHIFT;

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (target_bytes &amp;gt;= cli-&amp;gt;cl_avail_grant) {
                spin_unlock(&amp;amp;cli-&amp;gt;cl_loi_list_lock);
                RETURN(0);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;add an expection for MDT?&lt;/p&gt;</comment>
                            <comment id="312997" author="adilger" created="Thu, 16 Sep 2021 06:47:55 +0000"  >&lt;blockquote&gt;
&lt;p&gt;add an expection for MDT?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;No, I&apos;d rather maintain good performance than make &quot;df&quot; totally consistent across all the clients/MDTs.  Having &lt;em&gt;some&lt;/em&gt; grant shrinking on the MDTs would still be desirable, but is outside the scope of this ticket.  I filed &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15010&quot; title=&quot;grant shrink for MDTs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15010&quot;&gt;&lt;del&gt;LU-15010&lt;/del&gt;&lt;/a&gt; for this.&lt;/p&gt;</comment>
                            <comment id="313174" author="gerrit" created="Fri, 17 Sep 2021 14:07:01 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/44803/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/44803/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13997&quot; title=&quot;sanity test_418: df and lfs df blocks output mismatch&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13997&quot;&gt;LU-13997&lt;/a&gt; tests: sanity/418 to cancel all client locks&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9e8ee5aa0339b06aa53dc00f4911e2019a3ef3a0&lt;/p&gt;</comment>
                            <comment id="313975" author="adilger" created="Sat, 25 Sep 2021 16:37:36 +0000"  >&lt;p&gt;It looks like this is still failing on master, for example:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/2121a4d8-1472-473c-875c-4b3b1288b726&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/2121a4d8-1472-473c-875c-4b3b1288b726&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="314038" author="hornc" created="Mon, 27 Sep 2021 15:22:58 +0000"  >&lt;p&gt;+1 on master - &lt;a href=&quot;https://testing.whamcloud.com/test_sets/1c8664ee-840b-453d-bbcd-c80e7b75d7d4&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/1c8664ee-840b-453d-bbcd-c80e7b75d7d4&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="314041" author="bzzz" created="Mon, 27 Sep 2021 15:40:19 +0000"  >&lt;p&gt;I hope &lt;a href=&quot;https://review.whamcloud.com/#/c/44956/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/44956/&lt;/a&gt; can help&lt;/p&gt;</comment>
                            <comment id="314975" author="hornc" created="Thu, 7 Oct 2021 21:36:13 +0000"  >&lt;p&gt;+1 on master - &lt;a href=&quot;https://testing.whamcloud.com/test_sets/302baca3-ea35-4824-a7fe-d4a6995111d7&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/302baca3-ea35-4824-a7fe-d4a6995111d7&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="315483" author="gerrit" created="Wed, 13 Oct 2021 20:36:48 +0000"  >&lt;p&gt;&quot;Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45231&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45231&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13997&quot; title=&quot;sanity test_418: df and lfs df blocks output mismatch&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13997&quot;&gt;LU-13997&lt;/a&gt; tests: fix sanity test_418 lock cancellation&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e4bb910bba91199af21199080c5f3eb1070c59c9&lt;/p&gt;</comment>
                            <comment id="316619" author="gerrit" created="Tue, 26 Oct 2021 23:42:30 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/45231/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45231/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13997&quot; title=&quot;sanity test_418: df and lfs df blocks output mismatch&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13997&quot;&gt;LU-13997&lt;/a&gt; tests: fix sanity test_418 lock cancellation&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c0596c4d8af3bce6565c0e16a1469fcc65191404&lt;/p&gt;</comment>
                            <comment id="316648" author="pjones" created="Wed, 27 Oct 2021 03:45:18 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                            <comment id="316665" author="adilger" created="Wed, 27 Oct 2021 04:02:29 +0000"  >&lt;p&gt;The landed patch only avoids the test failure, it does not actually fix it, which is why it is marked &lt;tt&gt;always_except&lt;/tt&gt; for now. &lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="66649">LU-15105</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="57779">LU-13125</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="66078">LU-15010</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01atb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>