<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:56:40 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6039] sanity-hsm test 228: INFO: task lhsmtool_posix:26565 blocked for more than 120 seconds.</title>
                <link>https://jira.whamcloud.com/browse/LU-6039</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;cmd :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; # PDSH=&quot;pdsh -R ssh -S -w&quot; ONLY=228 MDS_MOUNT_OPTS=&quot;-o rw,user_xattr&quot; OST_MOUNT_OPTS=&quot;-o user_xattr&quot; AGTCOUNT=1 agt1_HOST=vinayak lustre/tests/sanity-hsm.sh
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Dec 17 11:37:58 vinayak_centos kernel: INFO: task lhsmtool_posix:26565 blocked for more than 120 seconds.
Dec 17 11:37:58 vinayak_centos kernel:      Not tainted 2.6.32.431.20.3.el6_lustre #1
Dec 17 11:37:58 vinayak_centos kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Dec 17 11:37:58 vinayak_centos kernel: lhsmtool_posi D 0000000000000001     0 26565      1 0x00000080
Dec 17 11:37:58 vinayak_centos kernel: ffff880000f4ba98 0000000000000082 0000000000000000 ffff88000df84000
Dec 17 11:37:58 vinayak_centos kernel: ffff880000f4ba98 ffffffff8128d1f6 0000000200000010 0000000affffffff
Dec 17 11:37:58 vinayak_centos kernel: ffff88002d703098 ffff880000f4bfd8 000000000000fbc8 ffff88002d703098
Dec 17 11:37:58 vinayak_centos kernel: Call Trace:
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8128d1f6&amp;gt;] ? vsnprintf+0x336/0x5e0
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8152a40e&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8152a2ab&amp;gt;] mutex_lock+0x2b/0x50
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0f3219a&amp;gt;] ll_layout_refresh+0x1da/0xcd0 [lustre]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa07c4ab3&amp;gt;] ? ldlm_lock_add_to_lru+0x43/0x120 [ptlrpc]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0f5cd80&amp;gt;] ? ll_md_blocking_ast+0x0/0x7f0 [lustre]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa07e87a0&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0606011&amp;gt;] ? cl_io_slice_add+0xc1/0x190 [obdclass]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0f82d70&amp;gt;] vvp_io_init+0x340/0x490 [lustre]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0605088&amp;gt;] cl_io_init0+0x88/0x150 [obdclass]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa05fe505&amp;gt;] ? cl_env_get+0x195/0x350 [obdclass]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0608004&amp;gt;] cl_io_init+0x64/0xe0 [obdclass]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0f78ab1&amp;gt;] cl_glimpse_size0+0x91/0x1d0 [lustre]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffffa0f27966&amp;gt;] ll_getattr+0x2e6/0x800 [lustre]
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8119b442&amp;gt;] ? user_path_at+0x62/0xa0
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8118e851&amp;gt;] vfs_getattr+0x51/0x80
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8118e8e4&amp;gt;] vfs_fstatat+0x64/0xa0
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8118e944&amp;gt;] sys_newfstatat+0x24/0x50
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff810e1e87&amp;gt;] ? audit_syscall_entry+0x1d7/0x200
Dec 17 11:37:58 vinayak_centos kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>single node setup on local vm</environment>
        <key id="27950">LU-6039</key>
            <summary>sanity-hsm test 228: INFO: task lhsmtool_posix:26565 blocked for more than 120 seconds.</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="vinayak_clogeny">Vinayak Hariharmath</reporter>
                        <labels>
                            <label>HSM</label>
                    </labels>
                <created>Wed, 17 Dec 2014 06:47:39 +0000</created>
                <updated>Thu, 18 Dec 2014 23:18:01 +0000</updated>
                            <resolved>Thu, 18 Dec 2014 23:18:01 +0000</resolved>
                                    <version>Lustre 2.5.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="101839" author="jay" created="Wed, 17 Dec 2014 18:26:48 +0000"  >&lt;p&gt;If you can reproduce this problem, please use the following command:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;echo t &amp;gt; /proc/sysrq-trigger
dmesg &amp;gt; log.txt
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;and then attach the log.txt file here. I believe there is another process holding the mutex and waiting for response from the MDT.&lt;/p&gt;</comment>
                            <comment id="101842" author="adilger" created="Wed, 17 Dec 2014 18:35:36 +0000"  >&lt;p&gt;This bug is filed against 2.5.0. Are you really running that version, or is it 2.5.3 (which you probably should use, or even the tip of b2_5)?&lt;/p&gt;</comment>
                            <comment id="101925" author="vinayak_clogeny" created="Thu, 18 Dec 2014 06:50:32 +0000"  >&lt;p&gt;I ran it on master. Wrongly updated affect/versions.&lt;/p&gt;

&lt;p&gt;The test_228 is doing hsm_release 2 times. I commented out 2nd release statement and test case got passed. &lt;/p&gt;

&lt;p&gt;I mimicked the steps manually  &lt;br/&gt;
1. cd /mnt/lustre&lt;br/&gt;
2. created file dd if=/dev/zero of=test/tfile  bs=1k count=1k&lt;br/&gt;
3. lfs hsm_archive --archive=1 test/tfile&lt;br/&gt;
4. lfs hsm_release test/tfile&lt;br/&gt;
5. cp --sparse=auto test/tfile test/tfile.2&lt;br/&gt;
6. lfs hsm_release test/tfile&lt;br/&gt;
7. mkdir test/tdir&lt;br/&gt;
8. tar cf - --sparse test/tfile | tar xvf - -C test/tdir   ==&amp;gt; process hung on layout lock&lt;/p&gt;

&lt;p&gt;if we skip step 6, then everything goes fine.&lt;/p&gt;

&lt;p&gt;Even I checked with other operation (copy) instead of step 8&lt;br/&gt;
cp --sparse=auto test/tfile test/tdir/tfile.2    ==&amp;gt; this also got hung&lt;/p&gt;

&lt;p&gt;After this I got few of the questions in my mind&lt;br/&gt;
1. why file is released twice ?&lt;br/&gt;
2. Even if the file is released twice, why the operation getting hung ? &lt;br/&gt;
releasing the archived file for &quot;n&quot; number of times should not affect.&lt;/p&gt;

&lt;p&gt;Jinshan Xiong : Yes, you are right. layout lock has been held by some one and getting stuck there. I will update the logs.&lt;/p&gt;</comment>
                            <comment id="101926" author="vinayak_clogeny" created="Thu, 18 Dec 2014 07:03:31 +0000"  >&lt;p&gt;logs file attached&lt;/p&gt;</comment>
                            <comment id="101928" author="vinayak_clogeny" created="Thu, 18 Dec 2014 08:00:29 +0000"  >&lt;p&gt;Got answer for &lt;br/&gt;
1. why file is released twice ?&lt;br/&gt;
To perform step 5 ( cp --sparse=auto test/tfile test/tfile.2), file is restored first then copying is done.&lt;br/&gt;
After this step hsm_state will be reverted to &quot;exists&quot; from &quot;released&quot;. Thats why again the file is released for 2nd time.&lt;/p&gt;

&lt;p&gt;Below record proves that&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lfs hsm_state test/tfile 
test/tfile: (0x00000009) exists archived, archive_id:1

# lfs hsm_release test/tfile 

# lfs hsm_state test/tfile 
test/tfile: (0x0000000d) released exists archived, archive_id:1

# cp test/tfile test/tfile.2
lhsmtool_posix[13264]: copytool fs=lustre archive#=1 item_count=1
lhsmtool_posix[13264]: waiting for message from kernel
lhsmtool_posix[13296]: &apos;[0x200000400:0x2:0x0]&apos; action RESTORE reclen 72, cookie=0x549282f9
lhsmtool_posix[13296]: processing file &apos;test/tfile&apos;
lhsmtool_posix[13296]: reading stripe rules from &apos;/tmp/hsm//0002/0000/0400/0000/0002/0000/0x200000400:0x2:0x0.lov&apos; for &apos;/tmp/hsm//0002/0000/0400/0000/0002/0000/0x200000400:0x2:0x0&apos;
lhsmtool_posix[13296]: restoring data from &apos;/tmp/hsm//0002/0000/0400/0000/0002/0000/0x200000400:0x2:0x0&apos; to &apos;{VOLATILE}=[0x200000400:0x5:0x0]&apos;
lhsmtool_posix[13296]: going to copy data from &apos;/tmp/hsm//0002/0000/0400/0000/0002/0000/0x200000400:0x2:0x0&apos; to &apos;{VOLATILE}=[0x200000400:0x5:0x0]&apos;
lhsmtool_posix[13296]: data restore from &apos;/tmp/hsm//0002/0000/0400/0000/0002/0000/0x200000400:0x2:0x0&apos; to &apos;{VOLATILE}=[0x200000400:0x5:0x0]&apos; done
lhsmtool_posix[13296]: Action completed, notifying coordinator cookie=0x549282f9, FID=[0x200000400:0x2:0x0], hp_flags=0 err=0
lhsmtool_posix[13296]: llapi_hsm_action_end() on &apos;/mnt/lustre/.lustre/fid/0x200000400:0x2:0x0&apos; ok (rc=0)

# lfs hsm_state test/tfile 
test/tfile: (0x00000009) exists archived, archive_id:1

# lfs hsm_release test/tfile

# lfs hsm_state test/tfile 
test/tfile: (0x0000000d) released exists archived, archive_id:1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="101932" author="vinayak_clogeny" created="Thu, 18 Dec 2014 09:34:19 +0000"  >&lt;p&gt;is it related to &lt;br/&gt;
&lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-4727&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-4727&lt;/a&gt; ?&lt;/p&gt;</comment>
                            <comment id="102013" author="jay" created="Thu, 18 Dec 2014 23:17:45 +0000"  >&lt;p&gt;It looks like the same issue of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4727&quot; title=&quot;Lhsmtool_posix process stuck in ll_layout_refresh() when restoring&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4727&quot;&gt;&lt;del&gt;LU-4727&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="102014" author="jay" created="Thu, 18 Dec 2014 23:18:01 +0000"  >&lt;p&gt;duplication of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4727&quot; title=&quot;Lhsmtool_posix process stuck in ll_layout_refresh() when restoring&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4727&quot;&gt;&lt;del&gt;LU-4727&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="16624" name="LU-6039_dmesg" size="675504" author="vinayak_clogeny" created="Thu, 18 Dec 2014 07:03:31 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10040" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic</customfieldname>
                        <customfieldvalues>
                                        <label>client</label>
            <label>test</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>test</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            <customfield id="customfield_10070" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Project</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10040"><![CDATA[HSM]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx2of:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16831</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>