<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:38:45 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10851] parallel-scale-nfsv4 hangs on unmount</title>
                <link>https://jira.whamcloud.com/browse/LU-10851</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;parallel-scale-nfsv4 hangs on unmount after all tests have run. In the suite_log, the last thing we see is&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== parallel-scale-nfsv4 test complete, duration 2088 sec ============================================= 22:07:24 (1521868044)
&#160;
Unmounting NFS clients...
CMD: trevis-8vm1,trevis-8vm2 umount -f /mnt/lustre
&#160;
Unexporting Lustre filesystem...
CMD: trevis-8vm1,trevis-8vm2 chkconfig --list rpcidmapd 2&amp;gt;/dev/null |
&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;&#160;&#160;&#160;&#160;&#160; grep -q rpcidmapd &amp;amp;&amp;amp; service rpcidmapd stop ||
&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;&#160;&#160;&#160;&#160;&#160; true
CMD: trevis-8vm4 { [[ -e /etc/SuSE-release ]] &amp;amp;&amp;amp;
&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;service nfsserver stop; } ||
&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;service nfs stop
CMD: trevis-8vm4 sed -i &apos;/^lustre/d&apos; /etc/exports
CMD: trevis-8vm4 exportfs -v
CMD: trevis-8vm4 grep -c /mnt/lustre&apos; &apos; /proc/mounts
Stopping client trevis-8vm4 /mnt/lustre (opts:-f)
CMD: trevis-8vm4 lsof -t /mnt/lustre
CMD: trevis-8vm4 umount -f /mnt/lustre 2&amp;gt;&amp;amp;1&#160;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Looking at the console logs for vm4, MDS1 and 3, we see&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 2216.385890] Lustre: DEBUG MARKER: == parallel-scale-nfsv4 test complete, duration 2088 sec ============================================= 22:07:24 (1521868044)
[ 2216.698201] Lustre: DEBUG MARKER: { [[ -e /etc/SuSE-release ]] &amp;amp;&amp;amp;
[ 2216.698201] &#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;service nfsserver stop; } ||
[ 2216.698201] &#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;service nfs stop
[ 2216.805093] nfsd: last server has exited, flushing export cache
[ 2216.819487] Lustre: DEBUG MARKER: sed -i &apos;/^lustre/d&apos; /etc/exports
[ 2216.885266] Lustre: DEBUG MARKER: exportfs -v
[ 2216.945098] Lustre: DEBUG MARKER: grep -c /mnt/lustre&apos; &apos; /proc/mounts
[ 2216.982526] Lustre: DEBUG MARKER: lsof -t /mnt/lustre
[ 2217.170422] Lustre: DEBUG MARKER: umount -f /mnt/lustre 2&amp;gt;&amp;amp;1
[ 2217.192827] Lustre: setting import lustre-MDT0000_UUID INACTIVE by administrator request
[ 2217.193476] LustreError: 410:0:(file.c:205:ll_close_inode_openhandle()) lustre-clilmv-ffff880060b4e800: inode [0x200000406:0x3c1b:0x0] mdc close failed: rc = -108
[ 2217.218709] Lustre: 4066:0:(llite_lib.c:2676:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.84@tcp:/lustre/fid: [0x200000406:0x3e42:0x0]/ may get corrupted (rc -108)
[ 2217.218732] Lustre: 4066:0:(llite_lib.c:2676:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.84@tcp:/lustre/fid: [0x200000406:0x3e7b:0x0]/ may get corrupted (rc -108)
&#8230;
[ 5541.474664]
[ 5541.474667] umount&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; D 0000000000000000&#160;&#160;&#160;&#160; 0&#160;&#160; 410&#160;&#160;&#160; 409 0x00000000
[ 5541.474669]&#160; ffff88004365fda8 ffff88004365fde0 ffff880048e5ce00 ffff880043660000
[ 5541.474670]&#160; ffff88004365fde0 000000010013feb9 ffff88007fc10840 0000000000000000
[ 5541.474671]&#160; ffff88004365fdc0 ffffffff81612a95 ffff88007fc10840 ffff88004365fe68
[ 5541.474672] Call Trace:
[ 5541.474674]&#160; [&amp;lt;ffffffff81612a95&amp;gt;] schedule+0x35/0x80
[ 5541.474677]&#160; [&amp;lt;ffffffff81615851&amp;gt;] schedule_timeout+0x161/0x2d0
[ 5541.474689]&#160; [&amp;lt;ffffffffa1457cc7&amp;gt;] ll_kill_super+0x77/0x150 [lustre]
[ 5541.474723]&#160; [&amp;lt;ffffffffa09f3a94&amp;gt;] lustre_kill_super+0x34/0x40 [obdclass]
[ 5541.474734]&#160; [&amp;lt;ffffffff8120cf5f&amp;gt;] deactivate_locked_super+0x3f/0x70
[ 5541.474742]&#160; [&amp;lt;ffffffff812283fb&amp;gt;] cleanup_mnt+0x3b/0x80
[ 5541.474745]&#160; [&amp;lt;ffffffff8109d198&amp;gt;] task_work_run+0x78/0x90
[ 5541.474748]&#160; [&amp;lt;ffffffff8107b5cf&amp;gt;] exit_to_usermode_loop+0x91/0xc2
[ 5541.474760]&#160; [&amp;lt;ffffffff81003ae5&amp;gt;] syscall_return_slowpath+0x85/0xa0
[ 5541.474768]&#160; [&amp;lt;ffffffff81616ca7&amp;gt;] int_ret_from_sys_call+0x25/0x9f
[ 5541.476903] DWARF2 unwinder stuck at int_ret_from_sys_call+0x25/0x9f
[ 5541.476904]&#160;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;We see this problem with unmount on the maser and b2_10 branches for SLES12 SP2 and SP3 testing only.&lt;/p&gt;

&lt;p&gt;Logs for test suites failres are at&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/4bce5a66-2f2f-11e8-9e0e-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/4bce5a66-2f2f-11e8-9e0e-52540065bddc&lt;/a&gt;&#160;&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/103f280e-2fac-11e8-b3c6-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/103f280e-2fac-11e8-b3c6-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/044a75f0-2eba-11e8-b6a0-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/044a75f0-2eba-11e8-b6a0-52540065bddc&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="51511">LU-10851</key>
            <summary>parallel-scale-nfsv4 hangs on unmount</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="qian_wc">Qian Yingjin</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                    </labels>
                <created>Mon, 26 Mar 2018 17:04:39 +0000</created>
                <updated>Tue, 3 Oct 2023 00:48:04 +0000</updated>
                                            <version>Lustre 2.11.0</version>
                    <version>Lustre 2.12.0</version>
                    <version>Lustre 2.10.3</version>
                    <version>Lustre 2.10.4</version>
                    <version>Lustre 2.10.5</version>
                    <version>Lustre 2.13.0</version>
                    <version>Lustre 2.10.6</version>
                    <version>Lustre 2.10.7</version>
                    <version>Lustre 2.12.1</version>
                    <version>Lustre 2.12.3</version>
                    <version>Lustre 2.14.0</version>
                    <version>Lustre 2.12.5</version>
                    <version>Lustre 2.12.6</version>
                    <version>Lustre 2.15.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="224553" author="mdiep" created="Mon, 26 Mar 2018 17:19:49 +0000"  >&lt;p&gt;let&apos;s wait after &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10566&quot; title=&quot;parallel-scale-nfsv4 test_metabench: mkdir: cannot create directory on Read-only file system&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10566&quot;&gt;LU-10566&lt;/a&gt; fix.&lt;/p&gt;</comment>
                            <comment id="227472" author="standan" created="Tue, 8 May 2018 00:48:48 +0000"  >&lt;p&gt;+1 for 2.10.3_132&#160;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/ea75ed40-5091-11e8-abc3-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/ea75ed40-5091-11e8-abc3-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="232018" author="jamesanunez" created="Wed, 15 Aug 2018 23:35:17 +0000"  >&lt;p&gt;We have a similar hang on umount at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/15040422-a0d1-11e8-87f3-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/15040422-a0d1-11e8-87f3-52540065bddc&lt;/a&gt; for SLES12 SP3. &lt;/p&gt;</comment>
                            <comment id="249490" author="jamesanunez" created="Wed, 19 Jun 2019 17:21:58 +0000"  >&lt;p&gt;I&apos;m see non-SLES parallel-scale-nfsv4 test suites hang on umount. On the MDS, I don&apos;t see mount in the &apos;D&apos; state, but I do see the same error messages on the MDS. For example, &lt;a href=&quot;https://testing.whamcloud.com/test_sets/9acd3074-9234-11e9-bebb-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/9acd3074-9234-11e9-bebb-52540065bddc&lt;/a&gt; has the following in the MDS console&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[43887.752148] Lustre: DEBUG MARKER: == parallel-scale-nfsv4 test complete, duration 2913 sec ============================================= 00:44:22 (1560905062)
[43898.780782] Lustre: DEBUG MARKER: { [[ -e /etc/SuSE-release ]] &amp;amp;&amp;amp;
[43898.780782] 				 service nfsserver stop; } ||
[43898.780782] 				 service nfs stop
[43899.020702] nfsd: last server has exited, flushing export cache
[43899.183891] Lustre: DEBUG MARKER: sed -i &apos;/lustre/d&apos; /etc/exports
[43899.529625] Lustre: DEBUG MARKER: exportfs -v
[43899.871591] Lustre: DEBUG MARKER: grep -c /mnt/lustre&apos; &apos; /proc/mounts
[43900.207184] Lustre: DEBUG MARKER: lsof -t /mnt/lustre
[43900.668082] Lustre: DEBUG MARKER: umount -f /mnt/lustre 2&amp;gt;&amp;amp;1
[43900.832871] Lustre: setting import lustre-MDT0000_UUID INACTIVE by administrator request
[43900.833789] Lustre: Skipped 7 previous similar messages
[43900.835183] LustreError: 12449:0:(file.c:233:ll_close_inode_openhandle()) lustre-clilmv-ffff8ce81e82e800: inode [0x2000255c6:0x37fc:0x0] mdc close failed: rc = -108
[43900.836647] LustreError: 12449:0:(file.c:233:ll_close_inode_openhandle()) Skipped 38 previous similar messages
[43900.847426] Lustre: 7507:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x3730:0x0]/ may get corrupted (rc -108)
[43900.847442] Lustre: 7506:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x37ee:0x0]/ may get corrupted (rc -108)
[43900.851357] Lustre: 7506:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x3820:0x0]/ may get corrupted (rc -108)
[43900.853652] Lustre: 7507:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x3811:0x0]/ may get corrupted (rc -108)
[43900.853743] Lustre: 7506:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x378a:0x0]/ may get corrupted (rc -108)
[43900.858826] Lustre: 7507:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x3824:0x0]/ may get corrupted (rc -108)
[43900.858892] Lustre: 7506:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x37fa:0x0]/ may get corrupted (rc -108)
[43900.868467] Lustre: 7506:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x381e:0x0]/ may get corrupted (rc -108)
[43900.872369] Lustre: 7507:0:(llite_lib.c:2842:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.9.4.144@tcp:/lustre/fid: [0x2000255c6:0x381f:0x0]/ may get corrupted (rc -108)
[43910.921368] Lustre: Unmounted lustre-client
[43951.682868] Lustre: lustre-MDT0000: haven&apos;t heard from client e8ec627f-7451-4 (at 10.9.4.144@tcp) in 52 seconds. I think it&apos;s dead, and I am evicting it. exp ffff8ce7d95f0800, cur 1560905126 expire 1560905096 last 1560905074
[47228.231671] SysRq : Changing Loglevel
[47228.232247] Loglevel set to 8
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="253775" author="yujian" created="Wed, 28 Aug 2019 17:49:58 +0000"  >&lt;p&gt;+1 on Lustre b2_12 branch: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/1e8a4714-c997-11e9-a25b-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/1e8a4714-c997-11e9-a25b-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="269793" author="yujian" created="Sun, 10 May 2020 19:27:19 +0000"  >&lt;p&gt;+1 on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/9b783863-4852-4ef4-a19a-b945f8166aef&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/9b783863-4852-4ef4-a19a-b945f8166aef&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="281143" author="jamesanunez" created="Wed, 30 Sep 2020 22:10:02 +0000"  >&lt;p&gt;We see this hang on parallel-scale-nfsv3 also; &lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/0a96e362-e334-489a-9f54-0095fdfa20dc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/0a96e362-e334-489a-9f54-0095fdfa20dc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/c964e928-08e7-4c4a-ba13-99558a52174d&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/c964e928-08e7-4c4a-ba13-99558a52174d&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="327024" author="sarah" created="Tue, 22 Feb 2022 21:50:51 +0000"  >&lt;p&gt;similar error on master, it failed to stop nfs service after all tests finished.&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/e370a684-05f9-41cc-9e78-0b061119344f&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/e370a684-05f9-41cc-9e78-0b061119344f&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;onyx-71vm13: Redirecting to /bin/systemctl stop nfs.service
onyx-71vm13: Failed to stop nfs.service: Unit nfs.service not loaded.
onyx-71vm13: Redirecting to /bin/systemctl stop nfs-server.service
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="329102" author="artem_blagodarenko" created="Mon, 14 Mar 2022 07:55:42 +0000"  >&lt;p&gt;+1 on master&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/bc47e7ed-ce27-4fab-ad48-8ab438dc70b8&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/bc47e7ed-ce27-4fab-ad48-8ab438dc70b8&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="356580" author="deiter" created="Thu, 15 Dec 2022 18:38:23 +0000"  >&lt;p&gt;We hit the same issue on the master branch:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/1442a73f-2793-4e28-9124-a81b4ae65262&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/1442a73f-2793-4e28-9124-a81b4ae65262&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/f0561029-d1d1-4d61-9eea-7f2130ad9784&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/f0561029-d1d1-4d61-9eea-7f2130ad9784&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;all test sessions hang on:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
CMD: trevis-70vm4 umount -f /mnt/lustre 2&amp;gt;&amp;amp;1

[ 5154.892582] task:umount          state:D stack:    0 pid:37125 ppid: 37124 flags:0x00004080
[ 5154.894509] Call Trace:
[ 5154.895153]  __schedule+0x2bd/0x760
[ 5154.896010]  schedule+0x37/0xa0
[ 5154.896681]  schedule_timeout+0x197/0x300
[ 5154.897504]  ? __next_timer_interrupt+0xf0/0xf0
[ 5154.898422]  ? __radix_tree_delete+0x92/0xa0
[ 5154.899286]  ll_kill_super+0x63/0x130 [lustre]
[ 5154.900268]  lustre_kill_super+0x28/0x40 [lustre]
[ 5154.901221]  deactivate_locked_super+0x34/0x70
[ 5154.902133]  cleanup_mnt+0x3b/0x70
[ 5154.902869]  task_work_run+0x8a/0xb0
[ 5154.903640]  exit_to_usermode_loop+0xeb/0xf0
[ 5154.904525]  do_syscall_64+0x198/0x1a0
[ 5154.905288]  entry_SYSCALL_64_after_hwframe+0x65/0xca
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;dmesg:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 1796.367975] Lustre: setting &lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt; lustre-MDT0000_UUID INACTIVE by administrator request
[ 1796.370695] LustreError: 37125:0:(file.c:242:ll_close_inode_openhandle()) lustre-clilmv-ffff95a92ffe3800: inode [0x200000403:0x153d:0x0] mdc close failed: rc = -108
[ 1796.411489] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x28cf:0x0]/ may get corrupted (rc -108)
[ 1796.411662] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x29ec:0x0]/ may get corrupted (rc -108)
[ 1796.414759] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x293d:0x0]/ may get corrupted (rc -108)
[ 1796.421120] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2a1b:0x0]/ may get corrupted (rc -108)
[ 1796.436808] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x29ed:0x0]/ may get corrupted (rc -108)
[ 1796.442761] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2a05:0x0]/ may get corrupted (rc -108)
[ 1796.443103] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x28f9:0x0]/ may get corrupted (rc -108)
[ 1796.445919] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2940:0x0]/ may get corrupted (rc -108)
[ 1796.449051] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x28f0:0x0]/ may get corrupted (rc -108)
[ 1796.452119] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x29b1:0x0]/ may get corrupted (rc -108)
[ 1796.458583] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x298d:0x0]/ may get corrupted (rc -108)
[ 1796.463828] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x29bc:0x0]/ may get corrupted (rc -108)
[ 1796.466124] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2955:0x0]/ may get corrupted (rc -108)
[ 1796.466961] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x298e:0x0]/ may get corrupted (rc -108)
[ 1796.472725] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2a0f:0x0]/ may get corrupted (rc -108)
[ 1796.473142] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x294a:0x0]/ may get corrupted (rc -108)
[ 1796.480757] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x29e1:0x0]/ may get corrupted (rc -108)
[ 1796.481037] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x28f3:0x0]/ may get corrupted (rc -108)
[ 1796.483887] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2a10:0x0]/ may get corrupted (rc -108)
[ 1796.494909] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2901:0x0]/ may get corrupted (rc -108)
[ 1796.495139] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2945:0x0]/ may get corrupted (rc -108)
[ 1796.498049] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x28d7:0x0]/ may get corrupted (rc -108)
[ 1796.501156] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2a19:0x0]/ may get corrupted (rc -108)
[ 1796.504452] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x29d2:0x0]/ may get corrupted (rc -108)
[ 1796.519318] Lustre: 5617:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x296b:0x0]/ may get corrupted (rc -108)
[ 1796.525781] Lustre: 5618:0:(llite_lib.c:3674:ll_dirty_page_discard_warn()) lustre: dirty page discard: 10.240.41.235@tcp:/lustre/fid: [0x200000403:0x2a02:0x0]/ may get corrupted (rc -108)
[ 1826.914042] Lustre: lustre-MDT0000: haven&lt;span class=&quot;code-quote&quot;&gt;&apos;t heard from client 309ad38c-8e48-430d-ae7a-a65d0196c74a (at 0@lo) in 31 seconds. I think it&apos;&lt;/span&gt;s dead, and I am evicting it. exp 00000000f489d0d3, cur 1671076587 expire 1671076557 last 1671076556
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="356583" author="JIRAUSER17312" created="Thu, 15 Dec 2022 19:24:24 +0000"  >&lt;p&gt;Is this dmesg from the MDS or a client?&lt;/p&gt;</comment>
                            <comment id="356587" author="deiter" created="Thu, 15 Dec 2022 19:52:26 +0000"  >&lt;p&gt;Hello &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=cfaber&quot; class=&quot;user-hover&quot; rel=&quot;cfaber&quot;&gt;cfaber&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;&lt;cite&gt;Is this dmesg from the MDS or a client&lt;/cite&gt;?&lt;/p&gt;

&lt;p&gt;For some reason Lustre tests suite uses MDS host as a Lustre client. Test flow is:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Setup Lustre targets: host4 - MGS/MDS, host3 - OSS:
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
host4# mount -t lustre
/dev/mapper/mds1_flakey on /mnt/lustre-mds1 type lustre (rw,svname=lustre-MDT0000,mgs,osd=osd-ldiskfs,user_xattr,errors=remount-ro)

host3# mount -t lustre
/dev/mapper/ost1_flakey on /mnt/lustre-ost1 type lustre (rw,svname=lustre-OST0000,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
/dev/mapper/ost2_flakey on /mnt/lustre-ost2 type lustre (rw,svname=lustre-OST0001,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
/dev/mapper/ost3_flakey on /mnt/lustre-ost3 type lustre (rw,svname=lustre-OST0002,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
/dev/mapper/ost4_flakey on /mnt/lustre-ost4 type lustre (rw,svname=lustre-OST0003,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
/dev/mapper/ost5_flakey on /mnt/lustre-ost5 type lustre (rw,svname=lustre-OST0004,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
/dev/mapper/ost6_flakey on /mnt/lustre-ost6 type lustre (rw,svname=lustre-OST0005,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
/dev/mapper/ost7_flakey on /mnt/lustre-ost7 type lustre (rw,svname=lustre-OST0006,mgsnode=10.240.43.3@tcp,osd=osd-ldiskfs,errors=remount-ro)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;ul&gt;
	&lt;li&gt;Mount Lustre filesystem on host4 (e.g. as a Lustre client):
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
host4# mount -t lustre
10.240.43.3@tcp:/lustre on /mnt/lustre type lustre (rw,checksum,flock,user_xattr,lruresize,lazystatfs,32bitapi,nouser_fid2path,verbose,encrypt)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Note - 10.240.43.3 is a local address on the same host.&lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;Export  &lt;tt&gt;/mnt/lustre&lt;/tt&gt; via NFS
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
host4# showmount -e
Export list &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; trevis-84vm4.trevis.whamcloud.com:
/mnt/lustre *
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;ul&gt;
	&lt;li&gt;Mount &lt;tt&gt;/mnt/lustre&lt;/tt&gt; on host1 and host2 NFS clients&lt;/li&gt;
	&lt;li&gt;Start workload(s) on host1 and host2 NFS clients on top of mounted NFS share&lt;/li&gt;
	&lt;li&gt;Umount NFS share &lt;tt&gt;/mnt/lustre&lt;/tt&gt; on host1 and host2&lt;/li&gt;
	&lt;li&gt;Stop NFS server on host4&lt;/li&gt;
	&lt;li&gt;Try to umount Lustre client mountpoint   &lt;tt&gt;/mnt/lustre&lt;/tt&gt;  on host4:
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
host4# umount -f /mnt/lustre
==&amp;gt; hang here
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="356591" author="adilger" created="Thu, 15 Dec 2022 20:21:26 +0000"  >&lt;p&gt;Colin, the &lt;tt&gt;llite&amp;#42;&lt;/tt&gt; messages would definitely be from the client, regardless of where it is mounted.&lt;/p&gt;

&lt;p&gt;Alex, the reason for the NFS server to be mounted on the MDS is twofold:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;fewer test nodes needed&lt;/li&gt;
	&lt;li&gt;faster performance because client is local to MDS, where many of the RPCs are sent, for lower latency metadata operations&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I don&apos;t &lt;em&gt;think&lt;/em&gt; that would be a contributor to the unmount problem being seen, but hard to say for sure.&lt;/p&gt;</comment>
                            <comment id="356595" author="adilger" created="Thu, 15 Dec 2022 20:34:40 +0000"  >&lt;p&gt;Looking at the messages here, it seems the client still has cached files at the time that it is being unmounted, but the connection to the MDS is stopped shortly thereafter and that gives the client problems.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 2217.170422] Lustre: DEBUG MARKER: umount -f /mnt/lustre 2&amp;gt;&amp;amp;1
[ 2217.192827] Lustre: setting import lustre-MDT0000_UUID INACTIVE by administrator request
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The &quot;&lt;tt&gt;-f&lt;/tt&gt;&quot; flag means &quot;force unmount and don&apos;t wait gracefully&quot; and &lt;em&gt;should&lt;/em&gt; be able to clean up regardless of whether the MDS connection is available or not, but I wonder if it is part of the issue here?  If this was just &quot;&lt;tt&gt;umount /mnt/lustre&lt;/tt&gt;&quot; the client would flush the dirty cache (avoiding the &quot;dirty page discard&quot; errors) and &lt;em&gt;probably&lt;/em&gt; work more reliably.&lt;/p&gt;

&lt;p&gt;That said, &quot;&lt;tt&gt;umount -f&lt;/tt&gt;&quot; should &lt;b&gt;also&lt;/b&gt; work reliably, but that likely needs some changes to the code on the client.  According to the stack traces, the unmount is stuck on:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
void ll_kill_super(struct super_block *sb)
{
                &lt;span class=&quot;code-comment&quot;&gt;/* wait running statahead threads to quit */&lt;/span&gt;
                &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (atomic_read(&amp;amp;sbi-&amp;gt;ll_sa_running) &amp;gt; 0)
                        schedule_timeout_uninterruptible(
                                cfs_time_seconds(1) &amp;gt;&amp;gt; 3);
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;so &lt;em&gt;something&lt;/em&gt; is preventing the statahead threads from exiting.&lt;/p&gt;

&lt;p&gt;Likely the statahead threads need to be woken up here during the unmount process so that they can exit.  It isn&apos;t even clear if there &lt;b&gt;is&lt;/b&gt; a list of statahead threads on the superblock that can be signaled to wake up/exit?  It looks like only a count/limit of statahead threads.  I recall some changes in Yingjin&apos;s recent statahead patches, that these threads will exit themselves after a short time, but I&apos;m not sure if that is enough to fix this problem or not.  Definitely having a direct notification and the threads checking that the filesystem is being unmounted is better than a timeout.&lt;/p&gt;</comment>
                            <comment id="356598" author="adilger" created="Thu, 15 Dec 2022 20:44:51 +0000"  >&lt;p&gt;Alex, can you please test if removing the &quot;&lt;tt&gt;-f&lt;/tt&gt;&quot; allows this testing to pass more reliably?&lt;/p&gt;

&lt;p&gt;Yingjin, I think you are most familiar with statahead these days.  I&apos;m thinking that the patch to fix this may be quite different before/after your statahead patches.&lt;/p&gt;

&lt;p&gt;For master I would prefer that we prioritize landing the statahead patch series, so a patch as early in your series as possible that avoids other conflicts in later patches, if it isn&apos;t already fixed by your series.  Then a separate patch for backporting to earlier branches to keep the statahead threads on a list/waitqueue&lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/help_16.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt; on the superblock so they can be woken up and then the threads check if the filesystem is being unmounted and exit immediately.&lt;/p&gt;</comment>
                            <comment id="356600" author="deiter" created="Thu, 15 Dec 2022 21:02:14 +0000"  >&lt;p&gt;Hello &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=adilger&quot; class=&quot;user-hover&quot; rel=&quot;adilger&quot;&gt;adilger&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;Thank you very much for the detailed explanation!&lt;br/&gt;
Please let me test &lt;tt&gt;umount&lt;/tt&gt; without &lt;tt&gt;force&lt;/tt&gt;.&lt;/p&gt;

&lt;p&gt;Thank you!&lt;/p&gt;</comment>
                            <comment id="356653" author="deiter" created="Fri, 16 Dec 2022 09:21:43 +0000"  >&lt;p&gt;Hello &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=adilger&quot; class=&quot;user-hover&quot; rel=&quot;adilger&quot;&gt;adilger&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;&lt;cite&gt;Alex, can you please test if removing the &quot;-f&quot; allows this testing to pass more reliably&lt;/cite&gt;?&lt;/p&gt;

&lt;p&gt;Done, but result is the same: &lt;a href=&quot;https://testing.whamcloud.com/test_sessions/related?job=lustre-reviews&amp;amp;build=91221#redirect&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/related?job=lustre-reviews&amp;amp;build=91221#redirect&lt;/a&gt;&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 1737.450139] Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test complete, duration 1480 sec ========================================================== 23:08:24 (1671145704)
[ 1738.826052] Lustre: DEBUG MARKER: systemctl stop nfs-server
[ 1739.068162] nfsd: last server has exited, flushing export cache
[ 1739.311751] Lustre: DEBUG MARKER: sed -i &lt;span class=&quot;code-quote&quot;&gt;&apos;\|^/mnt/lustre|d&apos;&lt;/span&gt; /etc/exports
[ 1739.702372] Lustre: DEBUG MARKER: exportfs -v
[ 1740.089608] Lustre: DEBUG MARKER: grep -c /mnt/lustre&lt;span class=&quot;code-quote&quot;&gt;&apos; &apos;&lt;/span&gt; /proc/mounts
[ 1740.448974] Lustre: DEBUG MARKER: lsof -t /mnt/lustre
[ 1740.993566] Lustre: DEBUG MARKER: umount  /mnt/lustre 2&amp;gt;&amp;amp;1
[ 5093.425686] sysrq: SysRq : Changing Loglevel
...
[ 5096.722623] task:umount          state:D stack:    0 pid:56280 ppid: 56279 flags:0x00004080
[ 5096.724108] Call Trace:
[ 5096.724610]  __schedule+0x2bd/0x760
[ 5096.725284]  schedule+0x37/0xa0
[ 5096.725903]  schedule_timeout+0x197/0x300
[ 5096.726669]  ? __next_timer_interrupt+0xf0/0xf0
[ 5096.727535]  ? __radix_tree_delete+0x92/0xa0
[ 5096.728360]  ll_kill_super+0x63/0x130 [lustre]
[ 5096.729239]  lustre_kill_super+0x28/0x40 [lustre]
[ 5096.730133]  deactivate_locked_super+0x34/0x70
[ 5096.730970]  cleanup_mnt+0x3b/0x70
[ 5096.731645]  task_work_run+0x8a/0xb0
[ 5096.732344]  exit_to_usermode_loop+0xeb/0xf0
[ 5096.733163]  do_syscall_64+0x198/0x1a0
[ 5096.733888]  entry_SYSCALL_64_after_hwframe+0x65/0xca
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Details: umount hangs only for NVSv3 and may be caused by tests using locks:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 1231.270056] Lustre: DEBUG MARKER: /usr/sbin/lctl mark == parallel-scale-nfsv3 test connectathon: connectathon == 22:58:47 \(1671145127\)
[ 1231.633442] Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test connectathon: connectathon == 22:58:47 (1671145127)
[ 1232.072645] Lustre: DEBUG MARKER: /usr/sbin/lctl mark sh .\/runtests -N 2 -b -f \/mnt\/lustre\/d0.parallel-scale-nfs\/d0.connectathon
[ 1232.437529] Lustre: DEBUG MARKER: sh ./runtests -N 2 -b -f /mnt/lustre/d0.parallel-scale-nfs/d0.connectathon
[ 1234.227996] Lustre: DEBUG MARKER: /usr/sbin/lctl mark sh .\/runtests -N 2 -g -f \/mnt\/lustre\/d0.parallel-scale-nfs\/d0.connectathon
[ 1234.581110] Lustre: DEBUG MARKER: sh ./runtests -N 2 -g -f /mnt/lustre/d0.parallel-scale-nfs/d0.connectathon
[ 1241.533359] Lustre: DEBUG MARKER: /usr/sbin/lctl mark sh .\/runtests -N 2 -s -f \/mnt\/lustre\/d0.parallel-scale-nfs\/d0.connectathon
[ 1241.914629] Lustre: DEBUG MARKER: sh ./runtests -N 2 -s -f /mnt/lustre/d0.parallel-scale-nfs/d0.connectathon
[ 1250.386938] Lustre: DEBUG MARKER: /usr/sbin/lctl mark sh .\/runtests -N 2 -l -f \/mnt\/lustre\/d0.parallel-scale-nfs\/d0.connectathon
[ 1250.784748] Lustre: DEBUG MARKER: sh ./runtests -N 2 -l -f /mnt/lustre/d0.parallel-scale-nfs/d0.connectathon
[ 1285.778542] LustreError: 18862:0:(file.c:4836:ll_file_flock()) unknown fcntl lock command: 1029
[ 1325.200970] LustreError: 18862:0:(file.c:4836:ll_file_flock()) unknown fcntl lock command: 1029
[ 1355.406386] LustreError: 18862:0:(file.c:4836:ll_file_flock()) unknown fcntl lock command: 1029
[ 1395.339987] LustreError: 18862:0:(file.c:4836:ll_file_flock()) unknown fcntl lock command: 1029
[ 1434.762512] LustreError: 18862:0:(file.c:4836:ll_file_flock()) unknown fcntl lock command: 1029
[ 1464.967844] LustreError: 18862:0:(file.c:4836:ll_file_flock()) unknown fcntl lock command: 1029
[ 1473.104321] Lustre: DEBUG MARKER: /usr/sbin/lctl mark == parallel-scale-nfsv3 test iorssf: iorssf ============== 23:02:49 \(1671145369\)
[ 1473.474325] Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test iorssf: iorssf ============== 23:02:49 (1671145369)
[ 1473.951047] Lustre: DEBUG MARKER: /usr/sbin/lctl mark  parallel-scale-nfsv3 test_iorssf: @@@@@@ FAIL: ior failed! 1 
[ 1474.334915] Lustre: DEBUG MARKER: parallel-scale-nfsv3 test_iorssf: @@@@@@ FAIL: ior failed! 1
[ 1474.729606] Lustre: DEBUG MARKER: /usr/sbin/lctl dk &amp;gt; /autotest/autotest-2/2022-12-15/lustre-reviews_custom_91221_102_231e7dd2-7190-4e2c-8d24-93aa05c646b7&lt;span class=&quot;code-comment&quot;&gt;//parallel-scale-nfsv3.test_iorssf.debug_log.$(hostname -s).1671145371.log;
&lt;/span&gt;		dmesg &amp;gt; /autotest/autotest-2/2022-12-15/lustre-reviews_cu
[ 1476.359538] Lustre: DEBUG MARKER: /usr/sbin/lctl mark == parallel-scale-nfsv3 test iorfpp: iorfpp ============== 23:02:52 \(1671145372\)
[ 1476.726679] Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test iorfpp: iorfpp ============== 23:02:52 (1671145372)
[ 1477.201082] Lustre: DEBUG MARKER: /usr/sbin/lctl mark  parallel-scale-nfsv3 test_iorfpp: @@@@@@ FAIL: ior failed! 1 
[ 1477.578628] Lustre: DEBUG MARKER: parallel-scale-nfsv3 test_iorfpp: @@@@@@ FAIL: ior failed! 1
[ 1477.984261] Lustre: DEBUG MARKER: /usr/sbin/lctl dk &amp;gt; /autotest/autotest-2/2022-12-15/lustre-reviews_custom_91221_102_231e7dd2-7190-4e2c-8d24-93aa05c646b7&lt;span class=&quot;code-comment&quot;&gt;//parallel-scale-nfsv3.test_iorfpp.debug_log.$(hostname -s).1671145374.log;
&lt;/span&gt;		dmesg &amp;gt; /autotest/autotest-2/2022-12-15/lustre-reviews_cu
[ 1479.340152] Lustre: DEBUG MARKER: /usr/sbin/lctl mark == parallel-scale-nfsv3 test racer_on_nfs: racer on NFS client ========================================================== 23:02:55 \(1671145375\)
[ 1479.702251] Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test racer_on_nfs: racer on NFS client ========================================================== 23:02:55 (1671145375)
[ 1787.824195] Lustre: DEBUG MARKER: /usr/sbin/lctl mark == parallel-scale-nfsv3 test complete, duration 1466 sec ========================================================== 23:08:04 \(1671145684\)
[ 1788.199820] Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test complete, duration 1466 sec ========================================================== 23:08:04 (1671145684)
[ 1789.526631] Lustre: DEBUG MARKER: systemctl stop nfs-server
[ 1789.944393] nfsd: last server has exited, flushing export cache
[ 1790.227067] Lustre: DEBUG MARKER: sed -i &lt;span class=&quot;code-quote&quot;&gt;&apos;\|^/mnt/lustre|d&apos;&lt;/span&gt; /etc/exports
[ 1790.872269] Lustre: DEBUG MARKER: exportfs -v
[ 1791.514326] Lustre: DEBUG MARKER: grep -c /mnt/lustre&lt;span class=&quot;code-quote&quot;&gt;&apos; &apos;&lt;/span&gt; /proc/mounts
[ 1792.126821] Lustre: DEBUG MARKER: lsof -t /mnt/lustre
[ 1792.941836] Lustre: DEBUG MARKER: umount  /mnt/lustre 2&amp;gt;&amp;amp;1
[ 5153.799694] sysrq: SysRq : Changing Loglevel
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We can disable NLM locks on the server side and repeat the same tests to confirm the guess . What do you think ?&lt;br/&gt;
Reference: &lt;tt&gt;man nfs&lt;/tt&gt;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
      lock / nolock  Selects whether to use the NLM sideband protocol to lock  files
                      on  the  server.  If neither option is specified (or &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; lock is
                      specified), NLM locking is used &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt;  &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt;  mount  point.   When
                      using  the nolock option, applications can lock files, but such
                      locks provide exclusion only against other applications running
                      on  the  same  client.  Remote applications are not affected by
                      these locks.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Thank you!&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="78167">LU-17154</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="50398">LU-10566</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzusf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>