<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:50:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12209] cannot create stripe dir: Stale file handle</title>
                <link>https://jira.whamcloud.com/browse/LU-12209</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;I&apos;m facing a new issue on Oak (2.10.7 servers), tried with both 2.10 and 2.12 clients:&lt;/p&gt;

&lt;p&gt;As root:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cd /oak/stanford/groups/
# lfs mkdir -i 1 caiwei
lfs mkdir: dirstripe error on &apos;caiwei&apos;: Stale file handle
lfs setdirstripe: cannot create stripe dir &apos;caiwei&apos;: Stale file handle

# lfs getdirstripe .
lmv_stripe_count: 0 lmv_stripe_offset: 0 lmv_hash_type: none
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;does that ring a bell? Oak is only using DNE v1 with statically striped directories. Never seen that before 2.10.7 (we recently upgraded Oak).&lt;/p&gt;

&lt;p&gt;a basic lctl dk doesn&apos;t show anything on the MDS but I may have to enable specific debug flags to see more. No other traces found so far.&lt;/p&gt;

&lt;p&gt;Tried with 2.10 and 2.12 clients, with or without idmap.&lt;/p&gt;

&lt;p&gt;Thanks!&lt;/p&gt;</description>
                <environment>CentOS 7.6, servers 2.10.7, clients 2.12 or 2.10</environment>
        <key id="55454">LU-12209</key>
            <summary>cannot create stripe dir: Stale file handle</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="sthiell">Stephane Thiell</reporter>
                        <labels>
                    </labels>
                <created>Fri, 19 Apr 2019 15:26:54 +0000</created>
                <updated>Sun, 21 Apr 2019 13:38:17 +0000</updated>
                            <resolved>Sat, 20 Apr 2019 14:04:15 +0000</resolved>
                                    <version>Lustre 2.10.7</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="246086" author="sthiell" created="Fri, 19 Apr 2019 15:31:14 +0000"  >&lt;p&gt;Note that MDT0001 is still working fine within already-created directories:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@oak-rbh01 giocomo]# pwd
/oak/stanford/groups/giocomo
[root@oak-rbh01 giocomo]# lfs getdirstripe .
lmv_stripe_count: 0 lmv_stripe_offset: 1 lmv_hash_type: none
[root@oak-rbh01 giocomo]# mkdir .testdir
[root@oak-rbh01 giocomo]# lfs mkdir -i 1 .testdir2
[root@oak-rbh01 giocomo]# lfs getdirstripe .testdir*
lmv_stripe_count: 0 lmv_stripe_offset: 1 lmv_hash_type: none
lmv_stripe_count: 0 lmv_stripe_offset: 1 lmv_hash_type: none
[root@oak-rbh01 giocomo]# rmdir .testdir*
[root@oak-rbh01 giocomo]# 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="246087" author="pfarrell" created="Fri, 19 Apr 2019 15:33:28 +0000"  >&lt;p&gt;If it is this repeatable, can you get -1 debug on the client and the server?&#160; I know that may be a pain server side, but if possible it would be great.&lt;/p&gt;</comment>
                            <comment id="246088" author="sthiell" created="Fri, 19 Apr 2019 15:52:45 +0000"  >&lt;p&gt;Hi Patrick,&lt;/p&gt;

&lt;p&gt;Ok, I will try (should be in an hour, I&apos;m on my way to the office). But it looks like it is repeatable but only if doing lfs mkdir -i 1 in a parent directory striped on MDT0. See below.&lt;/p&gt;

&lt;p&gt;Creating a directory on MDT0 in a parent dir in MDT1 does work:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@oak-rbh01 giocomo]# lfs getdirstripe .
lmv_stripe_count: 0 lmv_stripe_offset: 1 lmv_hash_type: none

[root@oak-rbh01 giocomo]# lfs mkdir -i 0 .testdir_mdt0
[root@oak-rbh01 giocomo]# lfs getdirstripe .testdir_mdt0
lmv_stripe_count: 0 lmv_stripe_offset: 0 lmv_hash_type: none
[root@oak-rbh01 giocomo]# rmdir .testdir_mdt0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But not the other way around:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@oak-rbh01 giocomo]# cd ../ruthm
[root@oak-rbh01 ruthm]# lfs getdirstripe .
lmv_stripe_count: 0 lmv_stripe_offset: 0 lmv_hash_type: none
[root@oak-rbh01 ruthm]# lfs mkdir -i 0 .testdir_mdt0
[root@oak-rbh01 ruthm]# 
[root@oak-rbh01 ruthm]# lfs mkdir -i 1 .testdir_mdt1
error on LL_IOC_LMV_SETSTRIPE &apos;.testdir_mdt1&apos; (3): Stale file handle
error: mkdir: create stripe dir &apos;.testdir_mdt1&apos; failed
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="246092" author="sthiell" created="Fri, 19 Apr 2019 16:54:40 +0000"  >&lt;p&gt;This is done.&lt;br/&gt;
&#160;&lt;br/&gt;
Command issued on client sh-101-60 (10.9.101.60@o2ib4) running 2.12 was:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh-101-60 ruthm]# lctl clear
[root@sh-101-60 ruthm]# lfs mkdir -i 1 .testdir_mdt1
lfs mkdir: dirstripe error on &apos;.testdir_mdt1&apos;: Stale file handle
lfs setdirstripe: cannot create stripe dir &apos;.testdir_mdt1&apos;: Stale file handle
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Nothing else was running on this client.&lt;/p&gt;

&lt;p&gt;Client logs attached as &lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/32447/32447_sh-101-60.dk.gz&quot; title=&quot;sh-101-60.dk.gz attached to LU-12209&quot;&gt;sh-101-60.dk.gz&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;MDT0 and 1 dk logs attached as &lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/32449/32449_oak-md1-s2-MDT0.dk.gz&quot; title=&quot;oak-md1-s2-MDT0.dk.gz attached to LU-12209&quot;&gt;oak-md1-s2-MDT0.dk.gz&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt; and  &lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/32448/32448_oak-md1-s1-MDT1.dk.gz&quot; title=&quot;oak-md1-s1-MDT1.dk.gz attached to LU-12209&quot;&gt;oak-md1-s1-MDT1.dk.gz&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;  &lt;/p&gt;</comment>
                            <comment id="246098" author="pfarrell" created="Fri, 19 Apr 2019 18:41:53 +0000"  >&lt;p&gt;Stephane,&lt;/p&gt;

&lt;p&gt;Thanks for the more detailed logs.&lt;/p&gt;



&lt;p&gt;Here&apos;s the source of that ESTALE:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000040:00000001:18.0:1555692294.403870:0:22597:0:(llog_osd.c:322:llog_osd_declare_write_rec()) Process entered
00000040:00000001:18.0:1555692294.403870:0:22597:0:(llog_osd.c:340:llog_osd_declare_write_rec()) Process leaving (rc=18446744073709551500 : -116 : ffffffffffffff8c)
00000040:00000001:18.0:1555692294.403871:0:22597:0:(llog.c:960:llog_declare_write_rec()) Process leaving (rc=18446744073709551500 : -116 : ffffffffffffff8c)
00000040:00000001:18.0:1555692294.403871:0:22597:0:(llog_cat.c:141:llog_cat_new_log()) Process leaving via out (rc=18446744073709551500 : -116 : 0xffffffffffffff8c) &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Looks to be out of osp_md_declare_write:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;         if (dt2osp_obj(dt)-&amp;gt;opo_stale)
                return -ESTALE;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But I&apos;m not sure of much more.&#160; I&apos;m going to ask Lai to take a look at this - It&apos;s in the DNE area, as you noted.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;As for what would fix this...&#160; A failover /failbackof MDT1 might do the trick.&#160; It kind of looks like there&apos;s confusion over the state of an object in memory, and I think that might clear it up.&lt;/p&gt;</comment>
                            <comment id="246100" author="sthiell" created="Fri, 19 Apr 2019 18:59:05 +0000"  >&lt;p&gt;Thanks Patrick for this analysis.  I see that &lt;tt&gt;obj-&amp;gt;opo_stale = 1;&lt;/tt&gt; only in &lt;tt&gt;osp_invalidate()&lt;/tt&gt;...&lt;/p&gt;

&lt;p&gt;Because it&apos;s not impacting production, but just new group creation, we won&apos;t failover the MDT today (new groups can wait a bit &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;). We have some interactive jobs running. But I&apos;ll try to find a good time during the weekend to do so. Let me know if you want me to grab more debug info before then.&lt;/p&gt;</comment>
                            <comment id="246106" author="laisiyao" created="Sat, 20 Apr 2019 04:33:25 +0000"  >&lt;p&gt;This looks to be the same issue which was fixed by &lt;a href=&quot;https://review.whamcloud.com/#/c/33401/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/33401/&lt;/a&gt;, can you apply this patch on all MDS&apos;s and try again?&lt;/p&gt;</comment>
                            <comment id="246107" author="sthiell" created="Sat, 20 Apr 2019 14:01:12 +0000"  >&lt;p&gt;Hi Lai,&lt;/p&gt;

&lt;p&gt;We restarted the servers with the patch this morning and the problem is now gone. Thanks!&lt;/p&gt;</comment>
                            <comment id="246108" author="pjones" created="Sat, 20 Apr 2019 14:04:15 +0000"  >&lt;p&gt;Nice! Thanks all. &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=sthiell&quot; class=&quot;user-hover&quot; rel=&quot;sthiell&quot;&gt;sthiell&lt;/a&gt; note that this fix is included in the upcoming 2.12.1&lt;/p&gt;</comment>
                            <comment id="246111" author="sthiell" created="Sat, 20 Apr 2019 16:54:02 +0000"  >&lt;p&gt;Peter, this patch (&lt;a href=&quot;https://review.whamcloud.com/#/c/33401/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/33401/&lt;/a&gt;&#160;-&#160;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11418&quot; title=&quot;hung threads on MDT and MDT won&amp;#39;t umount&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11418&quot;&gt;&lt;del&gt;LU-11418&lt;/del&gt;&lt;/a&gt; llog: refresh remote llog upon -ESTALE) is already available in 2.12.0:&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit 71f409c9b31b90fa432f1f46ad4e612fb65c7fcc
Author: Lai Siyao &amp;lt;lai.siyao@intel.com&amp;gt;
Date:   Wed Oct 17 13:29:53 2018 +0800

    LU-11418 llog: refresh remote llog upon -ESTALE
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But it&apos;s not included in 2.10.7 (that we&apos;re running on our Oak servers).&lt;/p&gt;</comment>
                            <comment id="246130" author="pjones" created="Sun, 21 Apr 2019 13:38:17 +0000"  >&lt;p&gt;Stephane&lt;/p&gt;

&lt;p&gt;You are correct. Yet another illustration as to why it is confusing to having multiple patches tracked under the same Jira ticket spanning release boundaries&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="53392">LU-11418</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="32448" name="oak-md1-s1-MDT1.dk.gz" size="3092345" author="sthiell" created="Fri, 19 Apr 2019 16:53:14 +0000"/>
                            <attachment id="32449" name="oak-md1-s2-MDT0.dk.gz" size="10648116" author="sthiell" created="Fri, 19 Apr 2019 16:53:19 +0000"/>
                            <attachment id="32447" name="sh-101-60.dk.gz" size="371712" author="sthiell" created="Fri, 19 Apr 2019 16:53:08 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00f53:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>