<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:41:26 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11158] PFL component instantiation is not replayed properly</title>
                <link>https://jira.whamcloud.com/browse/LU-11158</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While investigating &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10961&quot; title=&quot;Clients hang after failovers. LustreError: 223668:0:(file.c:4213:ll_inode_revalidate_fini()) soaked: revalidate FID [0x200000007:0x1:0x0] error: rc = -4&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10961&quot;&gt;&lt;del&gt;LU-10961&lt;/del&gt;&lt;/a&gt; I have found that component instantiation is not replayed. Test showing the problem:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
test_132a() {
	[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.9.90) ] &amp;amp;&amp;amp;
		skip &lt;span class=&quot;code-quote&quot;&gt;&quot;Do not support PFL files before 2.10&quot;&lt;/span&gt;

	$LFS setstripe -E 1M -c 1 -E EOF -c 2 $DIR/$tfile
	replay_barrier $SINGLEMDS
	# write over the first component size cause next component instantiation
	dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/urandom of=$DIR/$tfile bs=1M count=1 seek=1 ||
		error &lt;span class=&quot;code-quote&quot;&gt;&quot;dd to $DIR/$tfile failed&quot;&lt;/span&gt;

	cksum=$(md5sum $DIR/$tfile | awk &lt;span class=&quot;code-quote&quot;&gt;&apos;{print $1}&apos;&lt;/span&gt;)
	$LFS getstripe -I2 $DIR/$tfile | grep -q lmm_objects ||
		error &lt;span class=&quot;code-quote&quot;&gt;&quot;Component #1 was not instantiated&quot;&lt;/span&gt;

	fail $SINGLEMDS

	cksum2=$(md5sum $DIR/$tfile | awk &lt;span class=&quot;code-quote&quot;&gt;&apos;{print $1}&apos;&lt;/span&gt;)
	&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; [ $cksum != $cksum2 ] ; then
		error_noexit &lt;span class=&quot;code-quote&quot;&gt;&quot;New checksum $cksum2 does not match original $cksum&quot;&lt;/span&gt;
	fi
	$LFS getstripe -I2 $DIR/$tfile | grep -q lmm_objects ||
		error &lt;span class=&quot;code-quote&quot;&gt;&quot;Component #1 instantiation was not replayed&quot;&lt;/span&gt;
}
run_test 132a &lt;span class=&quot;code-quote&quot;&gt;&quot;PFL &lt;span class=&quot;code-keyword&quot;&gt;new&lt;/span&gt; component instantiate replay&quot;&lt;/span&gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;it is double checked here - with checksums and by checking that next component has lmm_objects assigned. Both are failing in master.&lt;/p&gt;</description>
                <environment></environment>
        <key id="52752">LU-11158</key>
            <summary>PFL component instantiation is not replayed properly</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="tappro">Mikhail Pershin</reporter>
                        <labels>
                    </labels>
                <created>Wed, 18 Jul 2018 23:39:56 +0000</created>
                <updated>Wed, 27 Feb 2019 14:06:34 +0000</updated>
                            <resolved>Mon, 29 Oct 2018 16:14:52 +0000</resolved>
                                    <version>Lustre 2.10.0</version>
                    <version>Lustre 2.11.0</version>
                    <version>Lustre 2.12.0</version>
                                    <fixVersion>Lustre 2.12.0</fixVersion>
                    <fixVersion>Lustre 2.10.7</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="230517" author="adilger" created="Thu, 19 Jul 2018 03:47:10 +0000"  >&lt;p&gt;It sounds like the component intstantiation RPC needs to be assigned a transno and saved on the client for replay.&lt;/p&gt;</comment>
                            <comment id="230518" author="adilger" created="Thu, 19 Jul 2018 04:00:24 +0000"  >&lt;p&gt;Mike, I didn&apos;t find &lt;tt&gt;test_132a&lt;/tt&gt; in any test script.  Is this added in a patch, or a test you wrote for reproducing this problem?&lt;/p&gt;</comment>
                            <comment id="230520" author="tappro" created="Thu, 19 Jul 2018 05:00:01 +0000"  >&lt;p&gt;this is just a reproducer I&apos;ve added to replay-single.sh locally, I didn&apos;t push it into gerrit.&lt;/p&gt;</comment>
                            <comment id="230521" author="tappro" created="Thu, 19 Jul 2018 06:01:22 +0000"  >&lt;p&gt;I suspect that replay itself has been done as intended but replay data is wrong and new layout is being rewritten with old data. It seems that replay has old layout in replay data instead of the new one. At least I see in logs that layout was changed on client to the older generation.&lt;/p&gt;</comment>
                            <comment id="230522" author="adilger" created="Thu, 19 Jul 2018 07:01:15 +0000"  >&lt;p&gt;It seems likely only original open replay RPC is being sent, and it contains only the first component of the layout (which is always initialized at open). The write intent RPC that is causing the later components to be ibitialized is not being replayed at all. &lt;/p&gt;

&lt;p&gt;This write intent RPC may be sent weeks or years after the initial open, so it doesn&apos;t make sense to modify the layout stored with the initial open (which may not even exist on this same client). We need to replay each of the RPCs that caused the new component to be initialized, whichever client sent it.  This is similar to one client getting the open replay transno even though many clients tried to create the same file. &lt;/p&gt;</comment>
                            <comment id="230540" author="tappro" created="Thu, 19 Jul 2018 14:13:32 +0000"  >&lt;p&gt;As I see, write intent has transno and is replayed, there is also code to support replay in MDT/MDD/LOD but it is broken somewhere in the middle it seems. I am checking whole code patch right now.&lt;/p&gt;</comment>
                            <comment id="230541" author="bobijam" created="Thu, 19 Jul 2018 14:15:45 +0000"  >&lt;p&gt;I find that the mdt_lvbo_fill() complains that the lvblen (240) is small to hold the EA (sized 264). It looks like mdt_intent_layout() hasn&apos;t set RMF_DLM_LVB big enough.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;mdt_intent_layout()&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (mdt_object_exists(obj) &amp;amp;&amp;amp; !mdt_object_remote(obj)) {
                /* &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; layout is going to be changed don&apos;t use the current EA
                 * size but the maximum one. That buffer will be shrinked
                 * to the actual size in req_capsule_shrink() before reply.
                 */
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (layout.mlc_opc == MD_LAYOUT_WRITE) {
                        layout_size = info-&amp;gt;mti_mdt-&amp;gt;mdt_max_mdsize;
                } &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; {
                        layout_size = mdt_attr_get_eabuf_size(info, obj);
                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (layout_size &amp;lt; 0)
                                GOTO(out_obj, rc = layout_size);

                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (layout_size &amp;gt; info-&amp;gt;mti_mdt-&amp;gt;mdt_max_mdsize)
                                info-&amp;gt;mti_mdt-&amp;gt;mdt_max_mdsize = layout_size;
                }
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So I tried to change the default mdt_max_mdsize to a bigger size, and the test passed.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h
index 7999816676..20d13cb4f6 100644
--- a/lustre/include/uapi/linux/lustre/lustre_idl.h
+++ b/lustre/include/uapi/linux/lustre/lustre_idl.h
@@ -1117,7 +1117,7 @@ struct lov_mds_md_v1 {&#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-comment&quot;&gt;/* LOV EA mds/wire data (little-endian) */&lt;/span&gt;
&#160; &#160; &#160; &#160; struct lov_ost_data_v1 lmm_objects[0]; &lt;span class=&quot;code-comment&quot;&gt;/* per-stripe data */&lt;/span&gt;
 };
 
-#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
+#define MAX_MD_SIZE (sizeof(struct lov_comp_md_v1) + 4 * &#160; &#160; &#160; &#160; &#160; &#160; &#160; \
+ &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; (sizeof(struct lov_comp_md_entry_v1) +&#160; &#160; &#160; &#160; &#160; \
+&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; (sizeof(struct lov_mds_md) + 4 * &#160; &#160; &#160; &#160; &#160; &#160; &#160; \
+ &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; sizeof(struct lov_ost_data))))
 #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
 
 /* This is the &lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; MDT reply size allocated, should the striping be bigger, &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="230544" author="bobijam" created="Thu, 19 Jul 2018 14:43:46 +0000"  >&lt;p&gt;I don&apos;t know whether it is right or not that mdt_lvbo_fill() returns 0 when the LVBO buffer is smaller than the necessary EA size.&lt;/p&gt;</comment>
                            <comment id="230567" author="jinshan" created="Thu, 19 Jul 2018 17:32:14 +0000"  >&lt;blockquote&gt;

&lt;p&gt;This write intent RPC may be sent weeks or years after the initial open, so it doesn&apos;t make sense to modify the layout stored with the initial open (which may not even exist on this same client). We need to replay each of the RPCs that caused the new component to be initialized, whichever client sent it. This is similar to one client getting the open replay transno even though many clients tried to create the same file.&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;If an open RPC has been committed, should we just ignore the layout in the replay RPC? In another word, it will just restore the open context on the MDS side.&lt;/p&gt;

&lt;p&gt;RPCs that modify layout components are just regular REINT RPCs. As long as they are committed, they will be gone, no need to replay them.&lt;/p&gt;</comment>
                            <comment id="230569" author="jinshan" created="Thu, 19 Jul 2018 17:37:17 +0000"  >&lt;p&gt;bobi - there were some good discussion about this problem before, but I don&apos;t remember the ticket number. Probably we should pick it up and make a complete solution I proposed there. The real problem here is because &apos;mdt_max_mdsize&apos; keeps increasing by the current max mdsize it sees on the MDS(IIRC), which causes some problem for layout write when the system starts.&lt;/p&gt;</comment>
                            <comment id="230586" author="tappro" created="Thu, 19 Jul 2018 19:15:19 +0000"  >&lt;p&gt;IIRC, the mdt_lvbo_fill() may skip the EA getting just because something like that - &quot;we can do nothing here, let&apos;s report new EA size back and there will be separate getxattr RPC&quot;. That is not working with RPCs to be replayed though.&lt;/p&gt;</comment>
                            <comment id="230587" author="tappro" created="Thu, 19 Jul 2018 19:17:03 +0000"  >&lt;p&gt;on other hand I wonder why client can&apos;t supply correct EA size when updating layout? It knows the size, doesn&apos;t it? I mean reply buffer on client side can be allocated with proper size.&lt;/p&gt;</comment>
                            <comment id="230597" author="jinshan" created="Thu, 19 Jul 2018 20:20:12 +0000"  >&lt;p&gt;Yes, there &lt;em&gt;usually&lt;/em&gt;&#160;exists layout intent in file&apos;s layout, but there also exist cases that the file only has partially layout defined.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;The philosophy behind the design is that the MDS should decide what layout it will allocate and how many components it should instantiate, so client technically doesn&apos;t know the actual EA size. Does this make sense to you?&lt;/p&gt;</comment>
                            <comment id="230600" author="tappro" created="Thu, 19 Jul 2018 20:45:27 +0000"  >&lt;p&gt;Do you mean, for example, that new component may have defined size but no stripe count, etc. And MDS will complete that and provide final layout. Yes, that makes sense. I think we have here couple options, first, we can try to grow reply buffer for modification cases so layout will fill into it, second option here is quite non-trivial but still - what if MDS will return not whole layout in reply but just new component data? Considering that we have EX lock and new component instantiation doesn&apos;t change earlier components that should work and would require less reply size. Just thoughts, probably I am missing something here.&lt;/p&gt;</comment>
                            <comment id="230603" author="tappro" created="Thu, 19 Jul 2018 20:52:56 +0000"  >&lt;p&gt;Also, speaking about decision on client side - while MDS creates layout, the client still may predict its size quite correctly if number of stripes is known because layout size depends mostly on that, if it is not specified then client may allocate large reply buffer for some amount of stripes and MDS may consider that while creating new component.&lt;/p&gt;

&lt;p&gt;I wasn&apos;t participating that discussion you have mentioned, maybe there are good solution already.&lt;/p&gt;</comment>
                            <comment id="230647" author="gerrit" created="Fri, 20 Jul 2018 16:13:44 +0000"  >&lt;p&gt;Bobi Jam (bobijam@hotmail.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/32847&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/32847&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11158&quot; title=&quot;PFL component instantiation is not replayed properly&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11158&quot;&gt;&lt;del&gt;LU-11158&lt;/del&gt;&lt;/a&gt; mdt: grow lvb buffer to hold layout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: ef68d2e72f7fad7049594d14a78dda143fc0f736&lt;/p&gt;</comment>
                            <comment id="235772" author="gerrit" created="Mon, 29 Oct 2018 15:58:19 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/32847/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/32847/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11158&quot; title=&quot;PFL component instantiation is not replayed properly&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11158&quot;&gt;&lt;del&gt;LU-11158&lt;/del&gt;&lt;/a&gt; mdt: grow lvb buffer to hold layout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: e5abcf83c0575b8a79594c1eb9ea727739d91522&lt;/p&gt;</comment>
                            <comment id="235795" author="pjones" created="Mon, 29 Oct 2018 16:14:53 +0000"  >&lt;p&gt;Landed for 2.12&lt;/p&gt;</comment>
                            <comment id="240156" author="gerrit" created="Wed, 16 Jan 2019 17:37:55 +0000"  >&lt;p&gt;Bobi Jam (bobijam@hotmail.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34049&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34049&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11158&quot; title=&quot;PFL component instantiation is not replayed properly&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11158&quot;&gt;&lt;del&gt;LU-11158&lt;/del&gt;&lt;/a&gt; mdt: grow lvb buffer to hold layout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: ec5ce00e9c1d95a178a9ea5bf6cd2b26e0e28837&lt;/p&gt;</comment>
                            <comment id="242030" author="gerrit" created="Fri, 15 Feb 2019 01:28:38 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34049/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34049/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11158&quot; title=&quot;PFL component instantiation is not replayed properly&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11158&quot;&gt;&lt;del&gt;LU-11158&lt;/del&gt;&lt;/a&gt; mdt: grow lvb buffer to hold layout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: a1d1006a5e2bd7ba3dd9096107c456b353a3eeb0&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10120">
                    <name>Blocker</name>
                                            <outwardlinks description="is blocking">
                                        <issuelink>
            <issuekey id="52029">LU-10961</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="50877">LU-10686</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzze7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>