<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:17:34 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8441] Text file busy error after overwriting file</title>
                <link>https://jira.whamcloud.com/browse/LU-8441</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt; Here&apos;s our reproducer:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;sh -c &lt;span class=&quot;code-quote&quot;&gt;&apos;cd /p/lscratchd/$USER &amp;amp;&amp;amp; (f=toss-3321; rm -f $f; cp /bin/ls $f; od -N1 $f; ./$f; echo &amp;gt; $f; rm -f $f)&apos;&lt;/span&gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This looks similiar to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6232&quot; title=&quot;Text file busy error -- lustre 2.6.0 client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6232&quot;&gt;&lt;del&gt;LU-6232&lt;/del&gt;&lt;/a&gt;. This affects emacs which is impacting our users. This behaviour is a difference in how xemacs vs. vi deal with files that they already have open. With vi, it always writes to a temporary new file which it then moves over top of the file being edited. With xemacs, the original file is moved to &amp;lt;file&amp;gt;~ and a new file written on the first write. After that it overwrites the new file. One can see this by running them, saving a file, checking the inode number with &quot;ls -i &amp;lt;file&amp;gt;&quot;, and then repeating the save and check operations. With xemacs the inode number won&apos;t change for each save. With vi, it will.&lt;/p&gt;</description>
                <environment>lustre-2.5.5-6chaos_2.6.32_573.26.1.1chaos.ch5.4.x86_64.x86_64</environment>
        <key id="38429">LU-8441</key>
            <summary>Text file busy error after overwriting file</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="yujian">Jian Yu</assignee>
                                    <reporter username="kamakea1">Teresa Kamakea</reporter>
                        <labels>
                            <label>llnl</label>
                            <label>llnlfixready</label>
                    </labels>
                <created>Tue, 26 Jul 2016 23:13:27 +0000</created>
                <updated>Wed, 22 Nov 2017 01:55:50 +0000</updated>
                            <resolved>Wed, 22 Nov 2017 01:55:50 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                    <version>Lustre 2.5.5</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="160949" author="adilger" created="Fri, 5 Aug 2016 17:49:16 +0000"  >&lt;p&gt;I tried running the reproducer (slightly enhanced to show what it is doing) several times in a row and didn&apos;t see any problems (on 2.5.3.90 and 2.8.0 clients, 2.5.41.1 server), but I&apos;m not sure what it is supposed to show:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[mythtv@twoshoes ~]$ sh -c &apos;cd /myth/tmp/mythtv &amp;amp;&amp;amp; (f=toss-3321; rm -f $f; cp /bin/ls $f; od -N1 $f; ./$f -li; echo &amp;gt; $f; ls -li $f; rm -f $f)&apos;
0000000 000177
0000001
total 1
144116715221943228 -rwxr-xr-x 1 mythtv mythtv 114024 2016-08-05 11:41 toss-3321
144116715221943228 -rwxr-xr-x 1 mythtv mythtv 1 2016-08-05 11:41 toss-3321
[mythtv@twoshoes ~]$ sh -c &apos;cd /myth/tmp/mythtv &amp;amp;&amp;amp; (f=toss-3321; rm -f $f; cp /bin/ls $f; od -N1 $f; ./$f -li; echo &amp;gt; $f; ls -li $f; rm -f $f)&apos;
0000000 000177
0000001
total 1
144116715221943230 -rwxr-xr-x 1 mythtv mythtv 114024 2016-08-05 11:41 toss-3321
144116715221943230 -rwxr-xr-x 1 mythtv mythtv 1 2016-08-05 11:41 toss-3321
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Should this be reporting &quot;text flie busy&quot; at the &quot;&lt;tt&gt;echo &amp;gt; $f&lt;/tt&gt;&quot; stage?&lt;/p&gt;</comment>
                            <comment id="160951" author="green" created="Fri, 5 Aug 2016 17:50:06 +0000"  >&lt;p&gt;Sounds like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4367&quot; title=&quot;unlink performance regression on lustre-2.5.52 client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4367&quot;&gt;&lt;del&gt;LU-4367&lt;/del&gt;&lt;/a&gt; that had a patch adopted ages ago, but that was not 100% correct and was recently correctly refixed by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8019&quot; title=&quot;Openlock breakage&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8019&quot;&gt;&lt;del&gt;LU-8019&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="160990" author="nedbass" created="Fri, 5 Aug 2016 21:12:57 +0000"  >&lt;p&gt;Andreas, yes the echo is what fails with ETXTBSY.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;$  bass6@cab690 ~ /dev/pts/49 Fri Aug 05 14:10:59  &amp;gt;
sh -x -c &lt;span class=&quot;code-quote&quot;&gt;&apos;cd /p/lscratchd/$USER &amp;amp;&amp;amp; (f=toss-3321; rm -f $f; cp /bin/ls $f; od -N1 $f; ./$f; echo &amp;gt; $f; rm -f $f)&apos;&lt;/span&gt;
+ cd /p/lscratchd/bass6
+ f=toss-3321
+ rm -f toss-3321
+ cp /bin/ls toss-3321
+ od -N1 toss-3321
0000000 000177
0000001
+ ./toss-3321
bin  tmp  toss-3321  y  z
+ echo
sh: toss-3321: Text file busy
+ rm -f toss-3321
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="161139" author="pjones" created="Mon, 8 Aug 2016 17:15:02 +0000"  >&lt;p&gt;Ned&lt;/p&gt;

&lt;p&gt;So it seems that this issue will not be present when your clients move to 2.8.x. Is this issue serious enough that you would consider upgrading your 2.5.x clients to a newer version or will you just wait to eradicate this when you upgrade to 2.8.x?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="161156" author="nedbass" created="Mon, 8 Aug 2016 17:54:26 +0000"  >&lt;p&gt;Peter, we do not plan to upgrade our clients to 2.8 until sometime in 2017. This bug impacts the workflow of at least one of our users. While not serious, bugs like this force our users to adopt workarounds that tend to propagate and long outlive the original problem. So it would be nice to not have to wait for 2.8 for a fix, and I would rank it as low priority.&lt;/p&gt;</comment>
                            <comment id="161159" author="pjones" created="Mon, 8 Aug 2016 17:59:30 +0000"  >&lt;p&gt;Sure Ned.&lt;/p&gt;

&lt;p&gt;Jian&lt;/p&gt;

&lt;p&gt;Can you please port the fix for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4367&quot; title=&quot;unlink performance regression on lustre-2.5.52 client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4367&quot;&gt;&lt;del&gt;LU-4367&lt;/del&gt;&lt;/a&gt; to the 2.5 FE branch&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="161199" author="nedbass" created="Mon, 8 Aug 2016 21:13:13 +0000"  >&lt;blockquote&gt;&lt;p&gt;Peter, we do not plan to upgrade our clients to 2.8 until sometime in 2017. &lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;To clarify, that comment applies only to our existing client clusters. We will be bringing some new client and server clusters online running Lustre 2.8 over the next few weeks and months.&lt;/p&gt;</comment>
                            <comment id="198699" author="nedbass" created="Thu, 8 Jun 2017 22:07:10 +0000"  >&lt;p&gt;Peter, this issue is still affecting our Lustre 2.8 clients.&lt;/p&gt;</comment>
                            <comment id="198717" author="yujian" created="Fri, 9 Jun 2017 06:02:39 +0000"  >&lt;p&gt;Hi Ned,&lt;/p&gt;

&lt;p&gt;I set up a test cluster with the latest 2.8 FE build on both clients and servers, tried to run the reproducer but didn&apos;t see the &quot;Text file busy&quot; issue:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[bass6@onyx-21vm5 ~]$ lctl get_param -n version
lustre: 2.8.0.51
kernel: patchless_client
build:  2.8.0.51-g9bda4c1-CHANGED-2.6.32-642.15.1.el6.x86_64
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[bass6@onyx-21vm5 ~]$ sh -x -c &apos;cd /mnt/lustre/$USER &amp;amp;&amp;amp; (f=toss-3321; rm -f $f; cp /bin/ls $f; od -N1 $f; ./$f; echo &amp;gt; $f; rm -f $f)&apos;
+ cd /mnt/lustre/bass6
+ f=toss-3321
+ rm -f toss-3321
+ cp /bin/ls toss-3321
+ od -N1 toss-3321
0000000 000177
0000001
+ ./toss-3321
toss-3321
+ echo
+ rm -f toss-3321
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="205397" author="yujian" created="Tue, 15 Aug 2017 04:53:50 +0000"  >&lt;p&gt;Hi Ned,&lt;/p&gt;

&lt;p&gt;Could you please reproduce the failure and gather Lustre debug logs as follows?&lt;br/&gt;
 1. On Client and MDS nodes, run the following commands to set full debug level:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl set_param debug=-1 debug_mb=1024
# lctl clear

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;2. On Client node, run the following command to reproduce the failure:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;#  sh -x -c &apos;cd /p/lscratchd/$USER &amp;amp;&amp;amp; (f=toss-3321; rm -f $f; cp /bin/ls $f; od -N1 $f; ./$f; echo &amp;gt; $f; rm -f $f)&apos;

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;3. On Client and MDS nodes, run the following command to gather debug logs:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl dk &amp;gt; /tmp/debug-$(hostname -s)-$(date +%s)

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;4. Upload the logs to this ticket for investigation.&lt;/p&gt;

&lt;p&gt;Thank you.&lt;/p&gt;</comment>
                            <comment id="205679" author="kamakea1" created="Thu, 17 Aug 2017 22:16:49 +0000"  >&lt;p&gt;Jian,&lt;/p&gt;

&lt;p&gt;I tar&apos;d up the log files and attached them. Let me know if you need anything else.&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="205709" author="yujian" created="Fri, 18 Aug 2017 00:18:53 +0000"  >&lt;p&gt;Thank you for the logs, Teresa. Let me look into them.&lt;/p&gt;</comment>
                            <comment id="205806" author="yujian" created="Fri, 18 Aug 2017 22:24:02 +0000"  >&lt;p&gt;Hi Teresa,&lt;/p&gt;

&lt;p&gt;&quot;Text file busy&quot; is error code &quot;-ETXTBSY&quot; (-26). In Client debug log, the error code was returned as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00000001:0.0:1502922949.651746:0:179938:0:(namei.c:506:ll_lookup_it()) Process entered
00000080:00200000:0.0:1502922949.651746:0:179938:0:(namei.c:513:ll_lookup_it()) VFS Op:name=toss-3321,dir=144115205255725787/33554436(ffff8802c5555638),intent=open|creat
------8&amp;lt;------
00000002:00100000:0.0:1502922949.652793:0:179938:0:(mdc_locks.c:643:mdc_finish_enqueue()) @@@ op: 3 disposition: b, status: -26  req@ffff88031c0cf800 x1570675218372588/t0(0) o101-&amp;gt;lsd-MDT0000-mdc-ffff88041e231000@172.19.2.102@o2ib100:12/10 lens 2464/600 e 0 to 0 dl 1502923055 ref 1 fl Complete:R/0/0 rc 301/301
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The error status -26 was returned from MDS, while it seems the MDS debug log is inadequate because there is no -26 error code in the log. By looking into Lustre codes, I found &quot;-ETXTBSY&quot; was returned from mdt_write_get() and mdt_write_deny() in lustre/mdt/mdt_open.c. And both of the two functions were called from mdt_mfd_open(). It seems there is a race between the execution and write of the file toss-3321.&lt;/p&gt;

&lt;p&gt;Could you please try to add a &quot;sync&quot; between &quot;./$f;&quot; and &quot;echo &amp;gt; $f;&quot; in the command line to see if the error still occurs? If it&apos;s still reproducible, could you please gather the Lustre debug log on MDS again? Right before running the &quot;sh -x -c ...&quot; command, please run &quot;lctl dk &amp;gt;/dev/null&quot; on both Client and MDS to clear the debug buffer so that we can get the exact debug logs.&lt;/p&gt;

&lt;p&gt;Thank you.&lt;/p&gt;</comment>
                            <comment id="205816" author="yujian" created="Sat, 19 Aug 2017 01:30:14 +0000"  >&lt;p&gt;Hi Teresa,&lt;/p&gt;

&lt;p&gt;The similar issue was reported in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7727&quot; title=&quot;open with FMODE_EXEC fails with ETXTBSY after a failed FMODE_WRITE open attempt on a read only client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7727&quot;&gt;&lt;del&gt;LU-7727&lt;/del&gt;&lt;/a&gt;, which was fixed in Lustre 2.9.0. I back-ported the patch to Lustre b2_8_fe branch in &lt;a href=&quot;https://review.whamcloud.com/28606&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28606&lt;/a&gt;. Could you please try it? Thank you.&lt;/p&gt;</comment>
                            <comment id="206367" author="kamakea1" created="Thu, 24 Aug 2017 23:27:09 +0000"  >&lt;p&gt;Thank you Jian. I&apos;ll check in with Ned on trying the back-ported patch.&lt;/p&gt;</comment>
                            <comment id="206368" author="yujian" created="Thu, 24 Aug 2017 23:40:23 +0000"  >&lt;p&gt;You&apos;re welcome, Teresa.&lt;/p&gt;</comment>
                            <comment id="208802" author="nedbass" created="Tue, 19 Sep 2017 20:03:46 +0000"  >&lt;p&gt;Hi Jian Yu, while we haven&apos;t tested your backported patch, &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7727&quot; title=&quot;open with FMODE_EXEC fails with ETXTBSY after a failed FMODE_WRITE open attempt on a read only client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7727&quot;&gt;&lt;del&gt;LU-7727&lt;/del&gt;&lt;/a&gt; and patch 28606 relate specifically to read-only clients. This issue does not involve read-only clients, so I suspect it is not actually fixed.&lt;/p&gt;</comment>
                            <comment id="209183" author="green" created="Fri, 22 Sep 2017 03:56:59 +0000"  >&lt;p&gt;Hm, it&apos;s weird that the ticket is marked as resolved while apparently the problem is still present?&lt;/p&gt;

&lt;p&gt;Anyway I was looking into the client log provided to see if potentially &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8371&quot; title=&quot;Optimize open of known negative dentry&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8371&quot;&gt;&lt;del&gt;LU-8371&lt;/del&gt;&lt;/a&gt; might be useful here and noticed that the version you use does not match anything I have.&lt;br/&gt;
I know LLNL used to host their Lustre repo on github, but that seems to ended at 2.5 and I cannot find the new location.&lt;/p&gt;
</comment>
                            <comment id="209197" author="pjones" created="Fri, 22 Sep 2017 04:48:35 +0000"  >&lt;p&gt;Oleg&lt;/p&gt;

&lt;p&gt;We host the LLNL tree now - fs/lustre-release-fe-llnl.git&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="209202" author="green" created="Fri, 22 Sep 2017 04:59:13 +0000"  >&lt;p&gt;so I pulled that repo and tested 2.8.0_12.chaos tag and llnl/2.8.0-llnl branch (both appear to point at the same point) and still things don&apos;t match.&lt;/p&gt;

&lt;p&gt;In particular in file.c:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00000001:20.0:1502922949.651650:0:179942:0:(file.c:331:ll_md_close()) Process leaving (rc=0 : 0 : 0)
00000080:00000001:20.0:1502922949.651651:0:179942:0:(file.c:392:ll_file_release()) Process leaving (rc=0 : 0 : 0)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#ifdef CONFIG_FS_POSIX_ACL
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (sbi-&amp;gt;ll_flags &amp;amp; LL_SBI_RMT_CLIENT &amp;amp;&amp;amp;
            inode == inode-&amp;gt;i_sb-&amp;gt;s_root-&amp;gt;d_inode) {
                struct ll_file_data *fd = LUSTRE_FPRIVATE(file);

                LASSERT(fd != NULL);
331&amp;gt;&amp;gt;&amp;gt;   &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (unlikely(fd-&amp;gt;fd_flags &amp;amp; LL_FILE_RMTACL)) {
                        fd-&amp;gt;fd_flags &amp;amp;= ~LL_FILE_RMTACL;
                        rct_del(&amp;amp;sbi-&amp;gt;ll_rct, current_pid());
                        et_search_free(&amp;amp;sbi-&amp;gt;ll_et, current_pid());
                }
        }
#endif
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        }
392&amp;gt;&amp;gt;&amp;gt;&amp;gt;
        op_data = ll_prep_md_op_data(NULL, parent-&amp;gt;d_inode, de-&amp;gt;d_inode,
                                     name, len, 0, LUSTRE_OPC_ANY, NULL);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="210893" author="ofaaland" created="Wed, 11 Oct 2017 20:59:46 +0000"  >&lt;p&gt;Hi Oleg,&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;I pulled that repo and tested 2.8.0_12.chaos tag and llnl/2.8.0-llnl branch (both appear to point at the same point) and still things don&apos;t match.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;What two things you are comparing - what doesn&apos;t match what?&lt;br/&gt;
Thanks.&lt;/p&gt;</comment>
                            <comment id="210946" author="kamakea1" created="Thu, 12 Oct 2017 15:12:22 +0000"  >&lt;p&gt;The version on the client when the debug log was generated: Lustre: Lustre: Build Version: -13chaos-CHANGED-2.6.32-696.3.1.1chaos.ch5.6.x86_64&lt;/p&gt;

&lt;p&gt;The version on the MDS when the debug log was generated: Lustre: Lustre: Build Version: -13chaos-CHANGED-2.6.32-696.3.1.1chaos.ch5.6.x86_64&lt;/p&gt;</comment>
                            <comment id="210977" author="ofaaland" created="Thu, 12 Oct 2017 18:10:40 +0000"  >&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Oleg,&lt;/p&gt;

&lt;p&gt;Sorry for the confusion.&#160; Those look like Lustre 2.5 builds (I believe all our 2.8 builds are based on weak modules).&#160; I&apos;ll huddle with Teresa and get back to you.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="211233" author="green" created="Mon, 16 Oct 2017 20:21:09 +0000"  >&lt;p&gt;Olaf: the code does not match the logs.&lt;br/&gt;
Basically in those two lines of log above we can see that file.c lines 331 and 392 should be either GOTO, EXIT or RETURN. Additionally the function name should be as specified.&lt;/p&gt;

&lt;p&gt;I think the whole idea or reopening this ticket was due to 2.8 still exhibiting the ETXTBUSY issue? If that is so, please provide 2.8 logs with the problem.&lt;/p&gt;</comment>
                            <comment id="211237" author="ofaaland" created="Mon, 16 Oct 2017 20:48:57 +0000"  >&lt;p&gt;Oleg,&lt;/p&gt;

&lt;p&gt;Teresa will test with 2.8 and post here.&lt;/p&gt;</comment>
                            <comment id="213714" author="kamakea1" created="Tue, 14 Nov 2017 22:45:32 +0000"  >&lt;p&gt;I have attached the debug logs. There are 3 different scenarios: lustre 2.8 client/lustre2.8 server, lustre 2.5 client/lustre2.5 server, and lustre2.8 client/lustre2.5 server. The text file busy error shows up in the 2.5/2.5 scenario but not the others. The specific version information is included in the log files.&lt;/p&gt;</comment>
                            <comment id="213859" author="ofaaland" created="Wed, 15 Nov 2017 23:56:17 +0000"  >&lt;p&gt;Ned was mistaken when he stated we still see the problem on Lustre 2.8.&#160; The cluster he tested on, and that the end-user reported the issue on, had been accidentally switched to the Lustre 2.5 client.&lt;/p&gt;

&lt;p&gt;We see the problem only in the 2.5 client.&lt;/p&gt;</comment>
                            <comment id="214154" author="pjones" created="Mon, 20 Nov 2017 17:08:57 +0000"  >&lt;p&gt;Olaf&lt;/p&gt;

&lt;p&gt;So am I correct in thinking that this means that the original theory that this is a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7727&quot; title=&quot;open with FMODE_EXEC fails with ETXTBSY after a failed FMODE_WRITE open attempt on a read only client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7727&quot;&gt;&lt;del&gt;LU-7727&lt;/del&gt;&lt;/a&gt; holds true and that you would no longer expect to see this either when you apply the fix to your 2.5.x distribution or else upgrade to your 2.8.x distribution?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="214160" author="ofaaland" created="Mon, 20 Nov 2017 17:33:38 +0000"  >&lt;blockquote&gt;
&lt;p&gt;So am I correct in thinking that this means that the original theory that this is a duplicate of &lt;del&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7727&quot; title=&quot;open with FMODE_EXEC fails with ETXTBSY after a failed FMODE_WRITE open attempt on a read only client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7727&quot;&gt;&lt;del&gt;LU-7727&lt;/del&gt;&lt;/a&gt;&lt;/del&gt; holds true and that you would no longer expect to see this either when you apply the fix to your 2.5.x distribution&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;No, the patch for &lt;del&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7727&quot; title=&quot;open with FMODE_EXEC fails with ETXTBSY after a failed FMODE_WRITE open attempt on a read only client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7727&quot;&gt;&lt;del&gt;LU-7727&lt;/del&gt;&lt;/a&gt;&lt;/del&gt; is very specific to a read-only mount.&#160; The patch says, in effect, &quot;If the mount is read-only, and the file is being opened with the O_WRITE flag, then fail immediately instead of sending a request to the MDT&quot;.&lt;/p&gt;

&lt;p&gt;Without that patch, a request is sent to the MDT which then causes side-affects on other clients.&lt;/p&gt;

&lt;p&gt;It has a similar change on the MDT, for the case where an MDT receives an open request from a client which has mounted read-only.&lt;/p&gt;

&lt;p&gt;In our case, no clients are mounted read-only, so neither of those changes would have any effect.&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;or else upgrade to your 2.8.x distribution?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Correct, when all our systems are at 2.8.x we do not expect to see this.&lt;/p&gt;

&lt;p&gt;Since the issue does not occur in 2.8.x, it may well be that there is an existing patch that could be backported, but &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7727&quot; title=&quot;open with FMODE_EXEC fails with ETXTBSY after a failed FMODE_WRITE open attempt on a read only client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7727&quot;&gt;&lt;del&gt;LU-7727&lt;/del&gt;&lt;/a&gt; is not it.&lt;/p&gt;</comment>
                            <comment id="214167" author="pjones" created="Mon, 20 Nov 2017 18:49:22 +0000"  >&lt;p&gt;Ah yes. I did not read far enough back in the comments. So, &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=yujian&quot; class=&quot;user-hover&quot; rel=&quot;yujian&quot;&gt;yujian&lt;/a&gt; I think that the situation is now that this is believed to be a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4367&quot; title=&quot;unlink performance regression on lustre-2.5.52 client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4367&quot;&gt;&lt;del&gt;LU-4367&lt;/del&gt;&lt;/a&gt; but that change is not a simple back port to a 2.5.x branch and so the question is - is this an impactful enough issue to warrant taking on the risk of introducing a largish change that has not been proven in production environments elsewhere?&lt;/p&gt;</comment>
                            <comment id="214337" author="green" created="Tue, 21 Nov 2017 17:47:01 +0000"  >&lt;p&gt;It looks like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8109&quot; title=&quot;Separated MGS/MDS: conf-sanity test_32a: Unable to mount  (-114)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8109&quot;&gt;&lt;del&gt;LU-8109&lt;/del&gt;&lt;/a&gt; and all the preceding patches including &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3544&quot; title=&quot;Writing to new files under NFS export from Lustre will result in ENOENT (SLES11SP2)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3544&quot;&gt;&lt;del&gt;LU-3544&lt;/del&gt;&lt;/a&gt; is necessary to fix this, but the exact serie is quite long and and not exactly known at this time. what is known is there were multiple problems introduced and fixed along the way until finally ironing out all wrinkles here.&lt;/p&gt;</comment>
                            <comment id="214340" author="yujian" created="Tue, 21 Nov 2017 17:54:46 +0000"  >&lt;p&gt;Hi Olaf,&lt;br/&gt;
The fix is &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8019&quot; title=&quot;Openlock breakage&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8019&quot;&gt;&lt;del&gt;LU-8019&lt;/del&gt;&lt;/a&gt; and the prior patches. While trying to back-port the patch, I found it had a long dependency chain, including those for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3544&quot; title=&quot;Writing to new files under NFS export from Lustre will result in ENOENT (SLES11SP2)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3544&quot;&gt;&lt;del&gt;LU-3544&lt;/del&gt;&lt;/a&gt;, which contains more patches.&lt;/p&gt;</comment>
                            <comment id="214342" author="ofaaland" created="Tue, 21 Nov 2017 18:10:39 +0000"  >&lt;p&gt;Hi Jian and Oleg,&lt;/p&gt;

&lt;p&gt;Thank you for investigating.  Given the complexity and risk we can close this notfix and we will do the same in our local ticket.&lt;/p&gt;

&lt;p&gt;For my education, can you tell me where the EBUSY is coming from in our broken case, and describe of some of the factors that lead to this?  It need not be a complete and perfect description, just some hints that help understand the relevant code paths.&lt;/p&gt;</comment>
                            <comment id="214363" author="yujian" created="Tue, 21 Nov 2017 19:21:27 +0000"  >&lt;p&gt;Hi Olaf,&lt;br/&gt;
According to debug logs, there were some analyses in the previous comment &lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-8441?focusedCommentId=205806&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-205806&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-8441?focusedCommentId=205806&amp;amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-205806&lt;/a&gt; about where the ETXTBSY is coming from.&lt;/p&gt;

&lt;p&gt;Hi Oleg,&lt;br/&gt;
About the factors that lead to the issue, could you please give some hints? Thank you. &lt;/p&gt;</comment>
                            <comment id="214376" author="green" created="Wed, 22 Nov 2017 00:16:27 +0000"  >&lt;p&gt;This is mostly due to lingering file opens for write that got cached on the client. So when the exec comes it sees the file is opened for write and bails out (server side). We tried to just obtain a necessary ldlm lock before opening, but that proved to be very expensive.&lt;/p&gt;

&lt;p&gt;The cached open on the other hand is the real problem here, originally mostly aimed at nfs opened files, it managed to be enabled for other types of opens at times leading to such problems.&lt;/p&gt;</comment>
                            <comment id="214377" author="ofaaland" created="Wed, 22 Nov 2017 00:38:03 +0000"  >&lt;p&gt;Thanks, Jian and Oleg.&lt;/p&gt;

&lt;p&gt;That&apos;s all LLNL needs for this.  You can close notfix (or whatever your normal process is).&lt;/p&gt;</comment>
                            <comment id="214381" author="yujian" created="Wed, 22 Nov 2017 01:55:50 +0000"  >&lt;p&gt;Thank you, Olaf. I&apos;m closing this ticket as a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8019&quot; title=&quot;Openlock breakage&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8019&quot;&gt;&lt;del&gt;LU-8019&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="36132">LU-8019</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="34381">LU-7727</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="28017" name="debug.lu8441.tar" size="12267520" author="kamakea1" created="Thu, 17 Aug 2017 22:12:39 +0000"/>
                            <attachment id="28721" name="lu8441.logs.tar" size="56101534" author="kamakea1" created="Tue, 14 Nov 2017 22:42:25 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyiof:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>