<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:20:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8792] Interop - master&lt;-&gt;2.8 :sanity-hsm test_107: hung while umount MDT</title>
                <link>https://jira.whamcloud.com/browse/LU-8792</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Saurabh Tandan &amp;lt;saurabh.tandan@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/5d28dbcc-a076-11e6-8761-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/5d28dbcc-a076-11e6-8761-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_107 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test failed to respond and timed out
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;OST console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;23:51:00:Lustre: DEBUG MARKER: == sanity-hsm test 107: Copytool re-register after MDS restart ======================================= 22:50:03 (1477954203)
23:51:00:LustreError: 11-0: lustre-MDT0000-lwp-OST0001: operation obd_ping to node 10.9.5.239@tcp failed: rc = -107
23:51:00:Lustre: lustre-MDT0000-lwp-OST0001: Connection to lustre-MDT0000 (at 10.9.5.239@tcp) was lost; in progress operations using this service will wait for recovery to complete

23:51:01:automount     D 0000000000000000     0  3827      1 0x00000080
23:51:01: ffff88003e827be8 0000000000000082 00000000ffffffff 000023fcaf1a2ab5
23:51:01: ffff880037f78070 ffff8800466a4bb0 000000000081ce38 ffffffffa7e85d92
23:51:01: 000000001cc8adaa 0000000100d02944 ffff88005a82bad8 ffff88003e827fd8
23:51:01:Call Trace:
23:51:01: [&amp;lt;ffffffff811279c0&amp;gt;] ? sync_page_killable+0x0/0x40
23:51:01: [&amp;lt;ffffffff815399b3&amp;gt;] io_schedule+0x73/0xc0
23:51:01: [&amp;lt;ffffffff811279ad&amp;gt;] sync_page+0x3d/0x50
23:51:01: [&amp;lt;ffffffff811279ce&amp;gt;] sync_page_killable+0xe/0x40
23:51:01: [&amp;lt;ffffffff8153a24a&amp;gt;] __wait_on_bit_lock+0x5a/0xc0
23:51:01: [&amp;lt;ffffffff811278d7&amp;gt;] __lock_page_killable+0x67/0x70
23:51:01: [&amp;lt;ffffffff810a14e0&amp;gt;] ? wake_bit_function+0x0/0x50
23:51:01: [&amp;lt;ffffffff8112777e&amp;gt;] ? find_get_page+0x1e/0xa0
23:51:01: [&amp;lt;ffffffff81129604&amp;gt;] generic_file_aio_read+0x4b4/0x700
23:51:01: [&amp;lt;ffffffff81191e4a&amp;gt;] do_sync_read+0xfa/0x140
23:51:01: [&amp;lt;ffffffff810a1460&amp;gt;] ? autoremove_wake_function+0x0/0x40
23:51:01: [&amp;lt;ffffffff81159b85&amp;gt;] ? do_mmap_pgoff+0x335/0x380
23:51:01: [&amp;lt;ffffffff81232036&amp;gt;] ? security_file_permission+0x16/0x20
23:51:01: [&amp;lt;ffffffff81192745&amp;gt;] vfs_read+0xb5/0x1a0
23:51:01: [&amp;lt;ffffffff8119351f&amp;gt;] ? fget_light_pos+0x3f/0x50
23:51:01: [&amp;lt;ffffffff81192a91&amp;gt;] sys_read+0x51/0xb0
23:51:01: [&amp;lt;ffffffff810e8c7e&amp;gt;] ? __audit_syscall_exit+0x25e/0x290
23:51:01: [&amp;lt;ffffffff8100b0d2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;MDS consoe:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;23:17:51:LustreError: 17770:0:(client.c:1133:ptlrpc_import_delay_req()) Skipped 9 previous similar messages
23:17:51:Lustre: lustre-MDT0000: Not available for connect from 10.9.5.241@tcp (stopping)
23:17:51:Lustre: Skipped 1130 previous similar messages
23:17:51:LustreError: 0-0: Forced cleanup waiting for mdt-lustre-MDT0000_UUID namespace with 1 resources in use, (rc=-110)
23:17:51:LustreError: Skipped 102 previous similar messages
23:17:51:LustreError: 17786:0:(qsd_reint.c:56:qsd_reint_completion()) lustre-MDT0000: failed to enqueue global quota lock, glb fid:[0x200000006:0x10000:0x0], rc:-5
23:17:51:LustreError: 17786:0:(qsd_reint.c:56:qsd_reint_completion()) Skipped 9 previous similar messages
23:17:51:LustreError: 17822:0:(client.c:1133:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff8800444f73c0 x1549736680098240/t0(0) o101-&amp;gt;lustre-MDT0000-lwp-MDT0000@0@lo:23/10 lens 456/496 e 0 to 0 dl 0 ref 2 fl Rpc:/0/ffffffff rc 0/-1
23:17:51:LustreError: 17822:0:(client.c:1133:ptlrpc_import_delay_req()) Skipped 17 previous similar messages
23:17:51:LustreError: 17836:0:(qsd_reint.c:56:qsd_reint_completion()) lustre-MDT0000: failed to enqueue global quota lock, glb fid:[0x200000006:0x10000:0x0], rc:-5
23:17:51:LustreError: 17836:0:(qsd_reint.c:56:qsd_reint_completion()) Skipped 17 previous similar messages
23:17:51:Lustre: lustre-MDT0000: Not available for connect from 10.9.5.242@tcp (stopping)
23:17:51:Lustre: Skipped 1321 previous similar messages
23:17:51:LustreError: 0-0: Forced cleanup waiting for mdt-lustre-MDT0000_UUID namespace with 1 resources in use, (rc=-110)
23:50:37:********** Timeout by autotest system **********
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Client Console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;23:50:57:[10301.531931] Lustre: DEBUG MARKER: == sanity-hsm test 107: Copytool re-register after MDS restart ======================================= 22:50:03 (1477954203)
23:50:57:[10311.388026] LustreError: 11-0: lustre-MDT0000-mdc-ffff880007dc1800: operation obd_ping to node 10.9.5.239@tcp failed: rc = -107
23:50:57:[10311.396253] Lustre: lustre-MDT0000-mdc-ffff880007dc1800: Connection to lustre-MDT0000 (at 10.9.5.239@tcp) was lost; in progress operations using this service will wait for recovery to complete

23:50:57:[13944.654013] auditd          D ffff88007bb8fd90     0   490      1 0x00000000
23:50:57:[13944.654013]  ffff88007bb8fc30 0000000000000086 ffff880036707300 ffff88007bb8ffd8
23:50:57:[13944.654013]  ffff88007bb8ffd8 ffff88007bb8ffd8 ffff880036707300 ffff88007fc167c0
23:50:57:[13944.654013]  0000000000000000 7fffffffffffffff ffffffff81168d10 ffff88007bb8fd90
23:50:57:[13944.654013] Call Trace:
23:50:57:[13944.654013]  [&amp;lt;ffffffff81168d10&amp;gt;] ? wait_on_page_read+0x60/0x60
23:50:57:[13944.654013]  [&amp;lt;ffffffff8163bb39&amp;gt;] schedule+0x29/0x70
23:50:57:[13944.654013]  [&amp;lt;ffffffff81639829&amp;gt;] schedule_timeout+0x209/0x2d0
23:50:57:[13944.654013]  [&amp;lt;ffffffff81058aaf&amp;gt;] ? kvm_clock_get_cycles+0x1f/0x30
23:50:57:[13944.654013]  [&amp;lt;ffffffff81168d10&amp;gt;] ? wait_on_page_read+0x60/0x60
23:50:57:[13944.654013]  [&amp;lt;ffffffff8163b16e&amp;gt;] io_schedule_timeout+0xae/0x130
23:50:57:[13944.654013]  [&amp;lt;ffffffff8163b208&amp;gt;] io_schedule+0x18/0x20
23:50:57:[13944.654013]  [&amp;lt;ffffffff81168d1e&amp;gt;] sleep_on_page+0xe/0x20
23:50:57:[13944.654013]  [&amp;lt;ffffffff816399b0&amp;gt;] __wait_on_bit+0x60/0x90
23:50:57:[13944.654013]  [&amp;lt;ffffffff81168aa6&amp;gt;] wait_on_page_bit+0x86/0xb0
23:50:57:[13944.654013]  [&amp;lt;ffffffff810a6c00&amp;gt;] ? wake_atomic_t_function+0x40/0x40
23:50:57:[13944.654013]  [&amp;lt;ffffffff81168be1&amp;gt;] filemap_fdatawait_range+0x111/0x1b0
23:50:57:[13944.654013]  [&amp;lt;ffffffff81175d3e&amp;gt;] ? do_writepages+0x1e/0x40
23:50:57:[13944.654013]  [&amp;lt;ffffffff8116aad5&amp;gt;] ? __filemap_fdatawrite_range+0x65/0x80
23:50:57:[13944.654013]  [&amp;lt;ffffffff8116abff&amp;gt;] filemap_write_and_wait_range+0x3f/0x70
23:50:57:[13944.654013]  [&amp;lt;ffffffffa01807aa&amp;gt;] ext4_sync_file+0xba/0x320 [ext4]
23:50:57:[13944.654013]  [&amp;lt;ffffffff81210185&amp;gt;] do_fsync+0x65/0xa0
23:50:57:[13944.654013]  [&amp;lt;ffffffff81210450&amp;gt;] SyS_fsync+0x10/0x20
23:50:57:[13944.654013]  [&amp;lt;ffffffff81646b49&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Interop - 2.8.0 EL6.7 Server/ EL7.2 Client&lt;br/&gt;
Server - b2_8_fe build# 12, RHEL6.7&lt;br/&gt;
Client - master ,build# 3468 RHEL 7.2</environment>
        <key id="41253">LU-8792</key>
            <summary>Interop - master&lt;-&gt;2.8 :sanity-hsm test_107: hung while umount MDT</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="ys">Yang Sheng</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Wed, 2 Nov 2016 20:27:40 +0000</created>
                <updated>Wed, 16 Jan 2019 06:41:51 +0000</updated>
                            <resolved>Wed, 16 Jan 2019 06:41:51 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                    <version>Lustre 2.10.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="172377" author="pjones" created="Fri, 4 Nov 2016 17:43:22 +0000"  >&lt;p&gt;Yang Sheng&lt;/p&gt;

&lt;p&gt;Could you please advise on this one?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="172909" author="ys" created="Wed, 9 Nov 2016 11:16:35 +0000"  >&lt;p&gt;The umount hung on:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;umount        S 0000000000000000     0 17653  17652 0x00000080
 ffff88004f20f9a8 0000000000000082 ffffffffa0853748 0000000000000000
 ffff88004f20fa08 000000004f20c000 ffffffffa08b62b0 ffff880046461b18
 000000564f20f948 0000000100d03478 ffff88004f3bdad8 ffff88004f20ffd8
Call Trace:
 [&amp;lt;ffffffffa0483e1e&amp;gt;] ? cfs_hash_spin_lock+0xe/0x10 [libcfs]
 [&amp;lt;ffffffff8153a042&amp;gt;] schedule_timeout+0x192/0x2e0
 [&amp;lt;ffffffff81089be0&amp;gt;] ? process_timeout+0x0/0x10
 [&amp;lt;ffffffffa075f1e0&amp;gt;] __ldlm_namespace_free+0x1c0/0x560 [ptlrpc]
 [&amp;lt;ffffffff810672b0&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa075f5ef&amp;gt;] ldlm_namespace_free_prior+0x6f/0x220 [ptlrpc]
 [&amp;lt;ffffffffa0e045d2&amp;gt;] mdt_device_fini+0x6a2/0x12e0 [mdt]
 [&amp;lt;ffffffffa055eb06&amp;gt;] ? class_disconnect_exports+0x116/0x2f0 [obdclass]
 [&amp;lt;ffffffffa0577332&amp;gt;] class_cleanup+0x572/0xd20 [obdclass]
 [&amp;lt;ffffffffa055a386&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
 [&amp;lt;ffffffffa0579646&amp;gt;] class_process_config+0x1b66/0x24c0 [obdclass]
 [&amp;lt;ffffffffa047dcf1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffff81178a2c&amp;gt;] ? __kmalloc+0x21c/0x230
 [&amp;lt;ffffffffa057a45f&amp;gt;] class_manual_cleanup+0x4bf/0xc90 [obdclass]
 [&amp;lt;ffffffffa055a386&amp;gt;] ? class_name2dev+0x56/0xe0 [obdclass]
 [&amp;lt;ffffffffa05aba1c&amp;gt;] server_put_super+0x8bc/0xcd0 [obdclass]
 [&amp;lt;ffffffff811944bb&amp;gt;] generic_shutdown_super+0x5b/0xe0
 [&amp;lt;ffffffff811945a6&amp;gt;] kill_anon_super+0x16/0x60
 [&amp;lt;ffffffffa057d646&amp;gt;] lustre_kill_super+0x36/0x60 [obdclass]
 [&amp;lt;ffffffff81194d47&amp;gt;] deactivate_super+0x57/0x80
 [&amp;lt;ffffffff811b4d3f&amp;gt;] mntput_no_expire+0xbf/0x110
 [&amp;lt;ffffffff811b588b&amp;gt;] sys_umount+0x7b/0x3a0
 [&amp;lt;ffffffff8100b0d2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;and&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 0-0: Forced cleanup waiting for mdt-lustre-MDT0000_UUID namespace with 1 resources in use, (rc=-110)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So looks like some lock still not released. But i cannot find any clue to prove that.&lt;/p&gt;

&lt;p&gt;Then from MDS dmesg log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: /usr/sbin/lctl mark == sanity-hsm test 107: Copytool re-register after MDS restart ======================================= 22:50:03 \(1477954203\)
Lustre: DEBUG MARKER: == sanity-hsm test 107: Copytool re-register after MDS restart ======================================= 22:50:03 (1477954203)
Lustre: DEBUG MARKER: /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000405:0x21a:0x0&apos;.*action=&apos;ARCHIVE&apos;/ {print $13}&apos; | cut -f2 -d=
Lustre: DEBUG MARKER: /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000405:0x21a:0x0&apos;.*action=&apos;ARCHIVE&apos;/ {print $13}&apos; | cut -f2 -d=
Lustre: DEBUG MARKER: grep -c /mnt/lustre-mds1&apos; &apos; /proc/mounts
Lustre: DEBUG MARKER: umount -d /mnt/lustre-mds1
Lustre: Failing over lustre-MDT0000
LustreError: 17653:0:(ldlm_resource.c:887:ldlm_resource_complain()) mdt-lustre-MDT0000_UUID: namespace resource [0x200000404:0xb24e:0x0].0x0 (ffff8800453f3080) refcount nonzero (1) after lock cleanup; forcing cleanup.
LustreError: 17653:0:(ldlm_resource.c:1502:ldlm_resource_dump()) --- Resource: [0x200000404:0xb24e:0x0].0x0 (ffff8800453f3080) refcount = 2
Lustre: lustre-MDT0000: Not available for connect from 10.9.5.240@tcp (stopping)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The refcount of resource is nonzero but without any locks dump. Looks like same as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2067&quot; title=&quot;ldlm_resource_complain()) Namespace MGC resource refcount nonzero after lock cleanup&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2067&quot;&gt;&lt;del&gt;LU-2067&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;
</comment>
                            <comment id="196990" author="casperjx" created="Wed, 24 May 2017 22:58:15 +0000"  >&lt;p&gt;2.9.57, b3575:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/2209839b-42d4-4fe6-91f1-96f9ce3c5a69&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/2209839b-42d4-4fe6-91f1-96f9ce3c5a69&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="240078" author="ys" created="Wed, 16 Jan 2019 06:41:51 +0000"  >&lt;p&gt;Please feel free to reopen it.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="16203">LU-2067</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyu87:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>