<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:58:20 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6223] HSM recovery needs more tests and fixes</title>
                <link>https://jira.whamcloud.com/browse/LU-6223</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Recent ticket &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5939&quot; title=&quot;Error: trying to overwrite bigger transno&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5939&quot;&gt;&lt;del&gt;LU-5939&lt;/del&gt;&lt;/a&gt; reveals that HSM requests were not participating in recovery at all but that was hidden for all that time. That means there are lack of tests for HSM recovery cases.&lt;/p&gt;

&lt;p&gt;Simple test which simulates server failure reveals HSM recovery issues.&lt;br/&gt;
Test:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;test_17() {
	# test needs a running copytool
	copytool_setup

	mkdir -p $DIR/$tdir
	local f=$DIR/$tdir/$tfile
	local fid=$(copy_file /etc/hosts $f)

	replay_barrier $SINGLEMDS
	$LFS hsm_archive $f || error &lt;span class=&quot;code-quote&quot;&gt;&quot;archive of $f failed&quot;&lt;/span&gt;
	fail $SINGLEMDS
	wait_request_state $fid ARCHIVE SUCCEED

	$LFS hsm_release $f || error &lt;span class=&quot;code-quote&quot;&gt;&quot;release of $f failed&quot;&lt;/span&gt;

	replay_barrier $SINGLEMDS
	$LFS hsm_restore $f || error &lt;span class=&quot;code-quote&quot;&gt;&quot;restore of $f failed&quot;&lt;/span&gt;
	fail $SINGLEMDS
	wait_request_state $fid RESTORE SUCCEED

	echo -n &lt;span class=&quot;code-quote&quot;&gt;&quot;Verifying file state: &quot;&lt;/span&gt;
	check_hsm_flags $f &lt;span class=&quot;code-quote&quot;&gt;&quot;0x00000009&quot;&lt;/span&gt;

	diff -q /etc/hosts $f

	[[ $? -eq 0 ]] || error &lt;span class=&quot;code-quote&quot;&gt;&quot;Restored file differs&quot;&lt;/span&gt;

	copytool_cleanup
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Test failed on first server failure:&lt;br/&gt;
LustreError: 3248:0:(mdt_coordinator.c:985:mdt_hsm_cdt_start()) lustre-MDT0000: cannot take the layout locks needed for registered restore: -2&lt;/p&gt;

&lt;p&gt;Logs:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00000004:00000040:0.0:1423319087.106764:0:26715:0:(mdd_object.c:1599:mdd_object_make_hint()) [0x200002b10:0x6:0x0] eadata (&lt;span class=&quot;code-keyword&quot;&gt;null&lt;/span&gt;) len 0
00000004:00001000:0.0:1423319087.106773:0:26715:0:(lod_object.c:3229:lod_ah_init()) &lt;span class=&quot;code-keyword&quot;&gt;final&lt;/span&gt; striping: # 1 stripes, sz 1048576 from 
00000001:00000002:0.0:1423319087.106779:0:26715:0:(linkea.c:136:linkea_add_buf()) New link_ea name &lt;span class=&quot;code-quote&quot;&gt;&apos;.����:VOLATILE:0000:6B8B4567&apos;&lt;/span&gt; is added
00020000:00020000:0.0:1423319087.106788:0:26715:0:(lod_qos.c:1715:lod_qos_parse_config()) lustre-MDT0000-mdtlov: unrecognized magic 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;That is reason for test failure at this stage.&lt;/p&gt;</description>
                <environment></environment>
        <key id="28603">LU-6223</key>
            <summary>HSM recovery needs more tests and fixes</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="tappro">Mikhail Pershin</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Mon, 9 Feb 2015 04:14:17 +0000</created>
                <updated>Fri, 21 Jan 2022 02:35:40 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>17</watches>
                                                                            <comments>
                            <comment id="106905" author="tappro" created="Fri, 13 Feb 2015 05:22:44 +0000"  >&lt;p&gt;Another problem to resolve in context of this ticket is multiple transactions inside single request. This should be handled properly.&lt;/p&gt;</comment>
                            <comment id="107926" author="bfaccini" created="Wed, 25 Feb 2015 14:08:32 +0000"  >&lt;p&gt;Hello Mike,&lt;br/&gt;
I am starting to work on this ticket, but it is unclear for me what needs to be specifically addressed here regarding work being done as part of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5939&quot; title=&quot;Error: trying to overwrite bigger transno&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5939&quot;&gt;&lt;del&gt;LU-5939&lt;/del&gt;&lt;/a&gt;/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6244&quot; title=&quot;Handle multiple transactions produced by single request&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6244&quot;&gt;&lt;del&gt;LU-6244&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
Is it only to add specific HSM recovery testing (including its multiple transactions in single request specific usage) ?&lt;/p&gt;
</comment>
                            <comment id="108082" author="tappro" created="Thu, 26 Feb 2015 10:50:26 +0000"  >&lt;p&gt;Hi Bruno,&lt;/p&gt;

&lt;p&gt;yes, this is about specific HSM recovery tests, see example above, test_17. It shows that HSM archive/restore can&apos;t survive server failover though they expected to do that. I expect to make this test pass as first task in context of this ticket, then we have to think about another recovery tests for HSM. I think we have to check all HSM modification requests for both replay and resend cases.&lt;/p&gt;</comment>
                            <comment id="108087" author="bfaccini" created="Thu, 26 Feb 2015 12:31:47 +0000"  >&lt;p&gt;Ok, cool and thanks for the clarification!&lt;/p&gt;</comment>
                            <comment id="125061" author="adilger" created="Tue, 25 Aug 2015 18:03:18 +0000"  >&lt;p&gt;Hi Bruno, any progress with this ticket?  As a starting point, could you please submit a patch with the above test_17 to see what is failing and what needs to be fixed.&lt;/p&gt;</comment>
                            <comment id="125505" author="gerrit" created="Fri, 28 Aug 2015 10:09:41 +0000"  >&lt;p&gt;Faccini Bruno (bruno.faccini@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16125&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16125&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6223&quot; title=&quot;HSM recovery needs more tests and fixes&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6223&quot;&gt;LU-6223&lt;/a&gt; tests: recovery of HSM requests&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6ba94a1f7ccaf2841daee19cfffd3150104f1d02&lt;/p&gt;</comment>
                            <comment id="230702" author="tappro" created="Sun, 22 Jul 2018 08:16:38 +0000"  >&lt;p&gt;This ticket stay opened for a quite long time but I don&apos;t see that this problem was resolved in any other way. Could someone who is working on HSM now review it and decide what to do?&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="27659">LU-5939</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="28693">LU-6244</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx5wn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>17410</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>