<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:12:01 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14699] changelog garbage collection is too lax</title>
                <link>https://jira.whamcloud.com/browse/LU-14699</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The changelog garbage collection enabled by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12871&quot; title=&quot;enable changelog garbage collection by default&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12871&quot;&gt;&lt;del&gt;LU-12871&lt;/del&gt;&lt;/a&gt; is too lazy.  It will only purge an idle changelog user and its records if the changelog itself is nearly full:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (likely(mdd-&amp;gt;mdd_changelog_gc &amp;amp;&amp;amp;
                     mdd-&amp;gt;mdd_cl.mc_gc_task == MDD_CHLG_GC_NONE &amp;amp;&amp;amp;
                     ktime_get_real_seconds() - mdd-&amp;gt;mdd_cl.mc_gc_time &amp;gt;
                        mdd-&amp;gt;mdd_changelog_min_gc_interval)) {
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (unlikely(llog_cat_free_space(ctxt-&amp;gt;loc_handle) &amp;lt;=
                             mdd-&amp;gt;mdd_changelog_min_free_cat_entries ||
                             OBD_FAIL_CHECK(OBD_FAIL_FORCE_GC_THREAD))) {
                        CWARN(&lt;span class=&quot;code-quote&quot;&gt;&quot;%s:%s low on changelog_catalog free entries, &quot;&lt;/span&gt;
                              &lt;span class=&quot;code-quote&quot;&gt;&quot;starting ChangeLog garbage collection thread\n&quot;&lt;/span&gt;,
                              obd-&amp;gt;obd_name,
                              OBD_FAIL_CHECK(OBD_FAIL_FORCE_GC_THREAD) ?
                                &lt;span class=&quot;code-quote&quot;&gt;&quot; simulate&quot;&lt;/span&gt; : &quot;&quot;);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The default &lt;tt&gt;mdd_changelog_min_free_cat_entries=2&lt;/tt&gt; and &lt;tt&gt;mdd_changelog_min_gc_interval=3600&lt;/tt&gt; so it will only check every hour if the changelog is within 2x65000 = 130000 entries of overflowing (out of ~4B entries), even if the changelog has been idle for weeks (with reduced settings, just to verify it is not evicted):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl get_param mdd.*.changelog*                       |
mdd.myth-MDT0000.changelog_deniednext=60                                        |
mdd.myth-MDT0000.changelog_gc=1                                                 
mdd.myth-MDT0000.changelog_max_idle_indexes=20800000                            
mdd.myth-MDT0000.changelog_max_idle_time=2500000                                
mdd.myth-MDT0000.changelog_min_free_cat_entries=2                               
mdd.myth-MDT0000.changelog_min_gc_interval=3600                                 
mdd.myth-MDT0000.changelog_size=3857464008                                      
mdd.myth-MDT0000.changelog_mask=                                                
MARK CREAT MKDIR HLINK SLINK MKNOD UNLNK RMDIR RENME RNMTO CLOSE LYOUT TRUNC SAT
TR XATTR HSM MTIME CTIME MIGRT FLRW RESYNC                                      
mdd.myth-MDT0000.changelog_users=
current index: 98130425
ID    index (idle seconds)
cl3   77315666 (2512315)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It would be better to evict idle changelog users after a week or two, which is plenty of time to get a broken consumer working again, even if the log isn&apos;t totally full.&lt;/p&gt;</description>
                <environment></environment>
        <key id="64369">LU-14699</key>
            <summary>changelog garbage collection is too lax</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="adilger">Andreas Dilger</reporter>
                        <labels>
                    </labels>
                <created>Sat, 22 May 2021 00:04:06 +0000</created>
                <updated>Thu, 30 Nov 2023 15:42:26 +0000</updated>
                            <resolved>Tue, 30 Nov 2021 13:56:07 +0000</resolved>
                                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>12</watches>
                                                                            <comments>
                            <comment id="302804" author="adilger" created="Thu, 27 May 2021 16:31:21 +0000"  >&lt;p&gt;I think rather than having a hard age/record cutoff (both of which may go wrong on certain cases) that we instead check &quot;(records x hours &amp;gt; (1&amp;lt;&amp;lt;32))&quot; or similar (in addition to the current &quot;out of space&quot; check). That will handle cases with very old users on systems with few records (30 days x 6M records), or less idle (7 days x 25M records).&lt;/p&gt;</comment>
                            <comment id="302807" author="adilger" created="Thu, 27 May 2021 16:36:32 +0000"  >&lt;p&gt;For users that cannot be evicted (eg. strict audit users) it would be desirable to allow a flag on a per-user basis (default off) that prevents that user from being garbage collected. &lt;/p&gt;</comment>
                            <comment id="312375" author="tappro" created="Thu, 9 Sep 2021 13:05:18 +0000"  >&lt;p&gt;I am not sure about the following thing - consider there is user supposed to stay long and doing changelog purging by itself, so changelog is not too big. Nowadays&#160; it will stay forever doing its job. With new approach it will be dropped after &lt;tt&gt;changelog_max_idle_time&lt;/tt&gt; being considered as &apos;idle&apos;. I don&apos;t think this is expected behavior on our side. You said we could prevent users from aggressive GC when needed, but that would mean we have to consider all old users as such just because we don&apos;t know their status. That means such users must be deregistered manually after all.&lt;/p&gt;

&lt;p&gt;As for flag itself, for the same compatibility reason it should be flag which allows agressive CG for user, the question is what behavior is default while user registering, e.g. &lt;tt&gt;&amp;#45;&amp;#45;aggressive_gc&lt;/tt&gt; or &lt;tt&gt;&amp;#45;&amp;#45;disable_gc&lt;/tt&gt; option to be introduced&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="312403" author="adilger" created="Thu, 9 Sep 2021 20:14:25 +0000"  >&lt;p&gt;The Changelog is not meant to be a lifelong history of all events that have happened to the filesystem, but rather an improvement over &quot;inotify&quot; and similar &quot;watch for current/recent events in the filesystem&quot; that only work if some process is actively watching the filesystem every second.  Changelog persistence is mainly to avoid short-term problems if the consumer crashes, or the MDS crashes, but should not be expected to work after weeks or months of idle time.  At that point, it is faster to just scan the whole MDT again instead of processing a billion Changelog records, and every Changelog consumer would have to be able to do full filesystem scans to start off anyway.&lt;/p&gt;

&lt;p&gt;We&apos;ve had &lt;b&gt;lots&lt;/b&gt; of issues with users registering changelog users, and then forgetting them, registering them multiple times and leaving the old ones idle, etc.  This results in either the MDT becoming full, or the Changelog becoming full, and preventing &lt;b&gt;all&lt;/b&gt; MDT operations from succeeding.   The duplicate user registration will be helped by patch &lt;a href=&quot;https://review.whamcloud.com/43380&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/43380&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13055&quot; title=&quot;add ability for named Changelog consumers&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13055&quot;&gt;&lt;del&gt;LU-13055&lt;/del&gt;&lt;/a&gt; mdd: per-user changelog names and mask&lt;/tt&gt;&quot;, but that isn&apos;t the only reason why idle Changelog users exist, and it will be a while before named users are created by all applications.&lt;/p&gt;

&lt;p&gt;I think there are few important changes needed.  The current GC doesn&apos;t appear to work properly at all (current stats on 2.14.0 server):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# # lctl get_param mdd.*.changelog*
mdd.myth-MDT0000.changelog_deniednext=60
mdd.myth-MDT0000.changelog_gc=1
mdd.myth-MDT0000.changelog_max_idle_indexes=20971520
mdd.myth-MDT0000.changelog_max_idle_time=2592000
mdd.myth-MDT0000.changelog_min_free_cat_entries=2
mdd.myth-MDT0000.changelog_min_gc_interval=30
mdd.myth-MDT0000.changelog_size=3884936000
mdd.myth-MDT0000.changelog_mask=
MARK CREAT MKDIR HLINK SLINK MKNOD UNLNK RMDIR RENME RNMTO CLOSE LYOUT TRUNC SATTR XATTR HSM MTIME CTIME MIGRT FLRW RESYNC 
mdd.myth-MDT0000.changelog_users=
current index: 98287389
ID    index (idle seconds)
cl3   77315666 (12082313)
# echo $((98287389 - 77315666))
20971723
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;This has been exceeding &lt;tt&gt;mdd.&amp;#42;.changelog_max_idle_time=2592000&lt;/tt&gt; for months without the user being removed.  I also just reduced &lt;tt&gt;mdd.&amp;#42;.changelog_max_idle_indexes=20971520&lt;/tt&gt; and &lt;tt&gt;mdd.&amp;#42;.changelog_min_gc_interval=30&lt;/tt&gt; and created enough records to exceed this limit without any GC being triggered.  The logic should definitely be changed that if &lt;em&gt;either&lt;/em&gt; &lt;tt&gt;changelog_max_idle_indexes&lt;/tt&gt; &lt;em&gt;or&lt;/em&gt; &lt;tt&gt;changelog_max_idle_time&lt;/tt&gt; is exceeded then the user should be deregistered, since the default values for these are very conservative (2B indexes, 30 days idle), and should be honored if they are explicitly set to lower values.&lt;/p&gt;

&lt;p&gt;Secondly, there is the question if we need better default values for the GC limits?  We might also consider the case of setting the default:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;         mdd-&amp;gt;mdd_changelog_max_idle_indexes = min(CHLOG_MAX_IDLE_INDEXES, total_inodes);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;since it doesn&apos;t make sense to keep 2B changelog records if the MDT only has 100M inodes, and this bounds the upper changelog size by the MDT size (avg. 180 bytes/record, so within the default free bytes per inode).  Above that, it would be faster to just reprocess every file in the filesystem.  This, combined with the previous change may be enough to avoid 99% of the current problems.&lt;/p&gt;

&lt;p&gt;In DDN-2174, I suggested &quot;&lt;tt&gt;(current_index - idle_index) * idle_seconds / 86400 &amp;gt; 4B&lt;/tt&gt;&quot; as a reasonable heuristic for deciding if a user is &quot;too idle&quot;.  That balances cases where there are lots records being created quickly and need to be consumed (e.g. 4B in 2 days ~= 50000/sec continuously) against a user that is idle for a very long time (e.g. 4B in 30 days ~= 1700/sec), but is possibly too complex for users to understand clearly.  &lt;/p&gt;

&lt;p&gt;I think the default &lt;b&gt;has&lt;/b&gt; to be that Changelog users will be deregistered if they are idle.  That is &lt;em&gt;already&lt;/em&gt; supposed to be the case since patch &lt;a href=&quot;https://review.whamcloud.com/36467&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36467&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12871&quot; title=&quot;enable changelog garbage collection by default&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12871&quot;&gt;&lt;del&gt;LU-12871&lt;/del&gt;&lt;/a&gt; mdd: enable Changelog garbage collection&lt;/tt&gt;&quot; was landed in 2.14 (was supposed to have been the default back in 2.11).   In the vast majority of cases, the weeks-idle Changelog user isn&apos;t important to anyone (or there would have been a consumer), and having the full Changelog/MDT cause the filesystem to become unusable (see many linked bugs) doesn&apos;t make anyone happy.  &lt;/p&gt;

&lt;p&gt;If there is a need for it, a later patch can add an option &lt;tt&gt;&amp;#45;&amp;#45;gc&amp;#45;disable&lt;/tt&gt; for a specific user if this really is a critical consumer.  I could imagine that audit records on some very restricted systems may prefer to keep very old Changelog records over having a working filesystem, but really they should just make the audit consumer run properly.  It &lt;em&gt;might&lt;/em&gt; separately be useful to have an option like &lt;tt&gt;&amp;#45;&amp;#45;gc&amp;#45;clear&lt;/tt&gt; or similar that continually clears the oldest records (e.g. to the start of the next Changelog file each time) for the oldest user(s) instead of deregistering the user(s), but I&apos;m not sure there is a specific need for that yet.&lt;/p&gt;

</comment>
                            <comment id="312406" author="tappro" created="Thu, 9 Sep 2021 20:52:43 +0000"  >&lt;p&gt;the key question is how to determine that user is idle, that was what I meant. Can there be changelog user that is active for a long time? E.g. Robingood HSM engine, if it will work with the same registered user more than our &lt;tt&gt;max_idle_time&lt;/tt&gt; then it will be deregistered, because we don&apos;t know if user is idle or is still needed. Changelog user &lt;tt&gt;cur_time&lt;/tt&gt; is set upon registration and just compared with current time to decide if changelog is idle or not. So basically it is not even &lt;tt&gt;idle&lt;/tt&gt; metric but &lt;tt&gt;age&lt;/tt&gt;&lt;/p&gt;</comment>
                            <comment id="312714" author="jhammond" created="Tue, 14 Sep 2021 12:05:04 +0000"  >&lt;p&gt;&amp;gt; Changelog user cur_time is set upon registration and just compared with current time to decide if changelog is idle or not. So basically it is not even idle metric but age&lt;/p&gt;

&lt;p&gt;&lt;tt&gt;cur_time&lt;/tt&gt; is also set on &lt;tt&gt;changelog_clear&lt;/tt&gt;. And an active changelog user should be calling &lt;tt&gt;changelog_clear&lt;/tt&gt; periodically. So I think it&apos;s OK to consider this an idle metric.&lt;/p&gt;</comment>
                            <comment id="314098" author="gerrit" created="Mon, 27 Sep 2021 22:34:18 +0000"  >&lt;p&gt;&quot;Mike Pershin &amp;lt;mpershin@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45068&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45068&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14699&quot; title=&quot;changelog garbage collection is too lax&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14699&quot;&gt;&lt;del&gt;LU-14699&lt;/del&gt;&lt;/a&gt; mdd: proactive changelog garbage collection&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 892a30f5ae6fc580afc20f3d6fdd5edfcf9e6fc0&lt;/p&gt;</comment>
                            <comment id="319476" author="gerrit" created="Tue, 30 Nov 2021 03:47:47 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/45068/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45068/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14699&quot; title=&quot;changelog garbage collection is too lax&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14699&quot;&gt;&lt;del&gt;LU-14699&lt;/del&gt;&lt;/a&gt; mdd: proactive changelog garbage collection&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: f60b307c5001e1d9035af61d2344af33d3ea0f85&lt;/p&gt;</comment>
                            <comment id="319547" author="pjones" created="Tue, 30 Nov 2021 13:56:07 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                            <comment id="323171" author="gerrit" created="Wed, 19 Jan 2022 16:59:09 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/46203&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/46203&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14699&quot; title=&quot;changelog garbage collection is too lax&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14699&quot;&gt;&lt;del&gt;LU-14699&lt;/del&gt;&lt;/a&gt; mdd: proactive changelog garbage collection&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: ec383a43b5d236c538bb24e8c22b95e577c74401&lt;/p&gt;</comment>
                            <comment id="323173" author="eaujames" created="Wed, 19 Jan 2022 17:04:18 +0000"  >&lt;p&gt;I have backported the &lt;a href=&quot;https://review.whamcloud.com/46203&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/46203&lt;/a&gt; for testing purpose.&lt;/p&gt;</comment>
                            <comment id="392532" author="nrutman" created="Thu, 9 Nov 2023 19:10:35 +0000"  >&lt;p&gt;I realize this ticket is closed, but this seems an appropriate place to ask:&lt;/p&gt;

&lt;p&gt;Deregistration of an idle consumer is a heavy penalty, requiring a user to re-register and restart their consumer process with a new ID. Wouldn&apos;t it make more sense to mark this consumer internally as &quot;idle&quot; and simply ignore it during the lowest-unconsumed-record check? Then if consumer does come back to life, it still has access the (remaining) changelog records (and we remove the &quot;idle&quot; flag). Less impact on users for an intermittent consumer. Idle consumers can be reported in changelog_users. And @Mikhail_Pershin&apos;s concern about evicting an apparently idle consumer on an idle system.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="57176">LU-12871</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="64300">LU-14688</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="57571">LU-13055</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="63860">LU-14626</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="78959">LU-17290</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="59905">LU-13772</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="68523">LU-15524</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01v6f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>