<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:23:52 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2277] Text file busy</title>
                <link>https://jira.whamcloud.com/browse/LU-2277</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;ol&gt;
	&lt;li&gt;ldd a.out&lt;br/&gt;
/usr/bin/ldd: line 118: ./a.out: Text file busy&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;see attached client and server debug traces.&lt;/p&gt;</description>
                <environment>server-2.1.3-1nas (centos6.3)&lt;br/&gt;
client 2.1.3-1nas  Suse11-1</environment>
        <key id="16573">LU-2277</key>
            <summary>Text file busy</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="utopiabound">Nathaniel Clark</assignee>
                                    <reporter username="mhanafi">Mahmoud Hanafi</reporter>
                        <labels>
                    </labels>
                <created>Mon, 5 Nov 2012 13:05:40 +0000</created>
                <updated>Mon, 17 Mar 2014 17:51:25 +0000</updated>
                            <resolved>Mon, 11 Mar 2013 14:16:23 +0000</resolved>
                                    <version>Lustre 2.1.3</version>
                                    <fixVersion>Lustre 2.1.5</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="47404" author="green" created="Mon, 5 Nov 2012 13:10:05 +0000"  >&lt;p&gt;This is supposed to happen when somebody holds a file opened for write.&lt;/p&gt;

&lt;p&gt;How was a.out produced? Was somebody holding it open still?&lt;/p&gt;</comment>
                            <comment id="47469" author="mhanafi" created="Tue, 6 Nov 2012 14:54:43 +0000"  >&lt;p&gt;a.out was compiled in placed. It seems that error only occers on some of the clients. &lt;/p&gt;</comment>
                            <comment id="47498" author="green" created="Tue, 6 Nov 2012 21:51:42 +0000"  >&lt;p&gt;Is there any NFS reexport going on by any chance?&lt;br/&gt;
Was the file created via NFS in particular? Do the clients where you are able to execute is on NFS and ones that fail are on lustre?&lt;/p&gt;

&lt;p&gt;I also see that you have standard debug enabled, if you can still reproduce this and there is no NFS involved, please increase debug level on your mds as follows:&lt;br/&gt;
cat /proc/sys/lnet/debug&lt;br/&gt;
remember the value it gave you&lt;/p&gt;

&lt;p&gt;try to minimize all other activity oing on on the cluster&lt;/p&gt;

&lt;p&gt;echo -1 &amp;gt;/proc/sys/lnet debug ; echo - trace &amp;gt;/proc/sys/lnet/debug&lt;/p&gt;

&lt;p&gt;do the reproducer&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;important to run the lctl below as quickly as possible after the reproducer&lt;br/&gt;
lctl dk &amp;gt;/tmp/lustre/log &lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;echo &quot;YOUR SAVED VALUE FROM ABOVE&quot; &amp;gt;/proc/sys/lnet/debug&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="47537" author="mhanafi" created="Wed, 7 Nov 2012 13:55:39 +0000"  >&lt;p&gt;The Lustre filesyste is not reexported via nfs. There NFS mounts on the client for home dir, etc.&lt;/p&gt;

&lt;p&gt;I was able to track this issue to the stripe width of the directory. If the stripe width is &amp;gt; 54 it will reproduce the error.&lt;/p&gt;

&lt;p&gt;how it was reproduced.&lt;br/&gt;
mhanafi@pfe1:/nobackupp5/mhanafi/60&amp;gt; cat test.c++&lt;br/&gt;
#include &amp;lt;stdio.h&amp;gt;&lt;br/&gt;
int&lt;br/&gt;
main(int argc, char *argv[])&lt;br/&gt;
{&lt;br/&gt;
    return 0;&lt;br/&gt;
}&lt;/p&gt;

&lt;p&gt;mhanafi@pfe1:/nobackupp5/mhanafi/60&amp;gt; icpc -O test.c++&lt;br/&gt;
mhanafi@pfe1:/nobackupp5/mhanafi/60&amp;gt; ldd a.out&lt;br/&gt;
/usr/bin/ldd: line 118: ./a.out: Text file busy&lt;/p&gt;

&lt;p&gt;see attached debug logs.&lt;/p&gt;</comment>
                            <comment id="48914" author="utopiabound" created="Fri, 7 Dec 2012 12:50:17 +0000"  >&lt;p&gt;Have reproduction case:&lt;br/&gt;
1 Client - CentOS 6.3 - Lustre 2.1.3-2.6.32_279.2.1.el6.x86_64&lt;br/&gt;
           gcc version 4.4.6 20120305 (Red Hat 4.4.6-4) (GCC)&lt;br/&gt;
1 MDS - CentOS 6.3 - Lustre 2.1.3-2.6.32_279.2.1.el6_lustre.gc46c389.x86_64 (1 mds/mgs partition)&lt;br/&gt;
1 OSS - CentOS 6.3 - Lustre 2.1.3-2.6.32_279.2.1.el6_lustre.gc46c389.x86_64 (60 osts on loop devices)&lt;/p&gt;

&lt;p&gt;All TCP interconnect&lt;br/&gt;
All Server partitions created &amp;amp; mounted with standard autotest tools (auster -c PATH/TO/custom.sh sanity.sh --only MOUNT)&lt;/p&gt;

&lt;p&gt;In mounted FS:&lt;br/&gt;
cd /mnt/lustre&lt;br/&gt;
mkdir 58&lt;br/&gt;
lfs setstripe -c 58 58&lt;br/&gt;
cd 58&lt;br/&gt;
cat &amp;lt;&amp;lt; EOF &amp;gt; test.c&lt;br/&gt;
#include &amp;lt;stdio.h&amp;gt;&lt;br/&gt;
int&lt;br/&gt;
main(int argc, char *argv[])&lt;br/&gt;
{&lt;br/&gt;
return 0;&lt;br/&gt;
}&lt;br/&gt;
EOF&lt;br/&gt;
gcc test.c&lt;br/&gt;
./a.out&lt;/p&gt;

&lt;p&gt;Expected Results:&lt;br/&gt;
Should run w/o error.  For directories with a strip width less than 54, there is no error.&lt;/p&gt;

&lt;p&gt;Actual Results:&lt;br/&gt;
For stripe widths of 54 and larger (at least up to 60) the following error results:&lt;br/&gt;
./a.out: Text file busy&lt;/p&gt;

&lt;p&gt;Other Test Results:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;If the filesystem is remounted then all a.out&apos;s will run correctly.&lt;/li&gt;
	&lt;li&gt;If any a.out is copied (either w/in or between directories, regardless of stripe width) it will run fine&lt;/li&gt;
	&lt;li&gt;If &quot;bad&quot; a.out is moved it will still illicit the same error&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;With 2 Clients (both CentOS 6.3 - Lustre 2.1.3-2.6.32_279.2.1.el6.x86_64)&lt;br/&gt;
1) First clients generates &quot;bad&quot; a.out&lt;br/&gt;
2) Second client mounts FS&lt;br/&gt;
3) 2nd client gets error running a.out&lt;br/&gt;
4) First client unmounts FS&lt;br/&gt;
5) 2nd Client no longer gets error running a.out&lt;/p&gt;

&lt;p&gt;WAG of cause:&lt;br/&gt;
Client 1 when compiling test.c somehow continues to have a writecount ref (mot_writecount &amp;gt; 0) after compilation finishes.  This ref is cleaned up when the client fully disconnects.&lt;/p&gt;</comment>
                            <comment id="48998" author="utopiabound" created="Mon, 10 Dec 2012 13:17:33 +0000"  >&lt;p&gt;Attached is C File which can recreate this bug in directories with a default stripe width greater than 53, by just doing a modified copy of an executable.&lt;/p&gt;

&lt;p&gt;What causes the executable to be in a bad state is the following order of events:&lt;br/&gt;
1. Create file (O_RDWR or O_WRONLY)&lt;br/&gt;
2. write to file&lt;br/&gt;
3. close file (up to here, everything is fine &amp;#8211; this is just a copy)&lt;br/&gt;
4. open file (O_RDWR or O_WRONLY) - O_RDONLY do not cause this issue&lt;br/&gt;
5. close file&lt;/p&gt;</comment>
                            <comment id="49206" author="utopiabound" created="Thu, 13 Dec 2012 16:13:21 +0000"  >&lt;p&gt;After more careful inspection of the logs, it appears that in the failing cases, the second open is being processed twice:&lt;/p&gt;

&lt;p&gt;This is log lines that match the following grep expression: &apos;&amp;#40;&amp;#40;mdt_close(&amp;#124;mdt_mfd_open&amp;#41;.*leaving&amp;#124;incoming&amp;#41;&apos; from logs that are of a double open copy to a directory with a given stripe size:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;53&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800189bc850 x1421171244404975 msgsize 192&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800189bc050 x1421171249691684 msgsize 192&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880014590800 x1421167733816518 msgsize 544&lt;br/&gt;
(mdt_open.c:721:mdt_mfd_open()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880012109400 x1421167733816520 msgsize 360&lt;br/&gt;
(mdt_open.c:1645:mdt_close()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880015236050 x1421167733816521 msgsize 544&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880012043000 x1421167733816522 msgsize 544&lt;br/&gt;
(mdt_open.c:721:mdt_mfd_open()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800152ca050 x1421167733816523 msgsize 360&lt;br/&gt;
(mdt_open.c:1645:mdt_close()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800189bd850 x1421167733816525 msgsize 192&lt;br/&gt;
54&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800189bd050 x1421167797775378 msgsize 192&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880012109400 x1421167797775379 msgsize 192&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800189be850 x1421171244404976 msgsize 192&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880014590800 x1421167733816584 msgsize 544&lt;br/&gt;
(mdt_open.c:721:mdt_mfd_open()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff8800148c9800 x1421167733816584 msgsize 544&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff88001458dc00 x1421167733816586 msgsize 360&lt;br/&gt;
(mdt_open.c:1645:mdt_close()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880015238850 x1421167733816587 msgsize 544&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880014689800 x1421167733816588 msgsize 544&lt;br/&gt;
(mdt_open.c:721:mdt_mfd_open()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880012109400 x1421167733816588 msgsize 544&lt;br/&gt;
(mdt_open.c:721:mdt_mfd_open()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
(events.c:284:request_in_callback()) incoming req@ffff880014590800 x1421167733816589 msgsize 360&lt;br/&gt;
(mdt_open.c:1645:mdt_close()) Process leaving (rc=0 : 0 : 0)&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Notice the repeated x1421167733816588 in the 54 stripe case with accompanying mdt_mfd_open but missing mdt_close().  mdt_close is skipped.  This means that a request is being processed twice.&lt;/p&gt;</comment>
                            <comment id="49314" author="utopiabound" created="Mon, 17 Dec 2012 11:51:55 +0000"  >&lt;p&gt;Note on what goes over the wire:&lt;br/&gt;
Items listed are wireshark dump of traffic, all items are requests client -&amp;gt; mds, all items are replied to prior to the next item being sent.&lt;/p&gt;

&lt;p&gt;For stripe sizes 50-53: (example is stripe 50)&lt;br/&gt;
1) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Write&amp;#93;&lt;/span&gt;[ intent : open create ] filename : a2&lt;br/&gt;
2) MDS_CLOSE &lt;span class=&quot;error&quot;&gt;&amp;#91;REINT_SETATTR&amp;#93;&lt;/span&gt;&lt;br/&gt;
3) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Read&amp;#93;&lt;/span&gt;[ intent : lookup ] filename : 50&lt;br/&gt;
4) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Read&amp;#93;&lt;/span&gt;[ intent : open ]&lt;span class=&quot;error&quot;&gt;&amp;#91;REINT_OPEN&amp;#93;&lt;/span&gt; filename : a2&lt;br/&gt;
5) MDS_CLOSE &lt;span class=&quot;error&quot;&gt;&amp;#91;REINT_SETATTR&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;For stripe sizes 54-59: (example is stripe 54)&lt;br/&gt;
1) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Write&amp;#93;&lt;/span&gt;[ intent : open create ] filename : a2&lt;br/&gt;
2) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Write&amp;#93;&lt;/span&gt;[ intent : open create ] filename : a2&lt;br/&gt;
3) MDS_CLOSE &lt;span class=&quot;error&quot;&gt;&amp;#91;REINT_SETATTR&amp;#93;&lt;/span&gt;&lt;br/&gt;
4) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Read&amp;#93;&lt;/span&gt;[ intent : lookup ] filename : 54&lt;br/&gt;
5) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Read&amp;#93;&lt;/span&gt;[ intent : open ] filename : a2&lt;br/&gt;
6) LDLM_ENQUEUE request &lt;span class=&quot;error&quot;&gt;&amp;#91;Concurrent Read&amp;#93;&lt;/span&gt;[ intent : open ] filename : a2&lt;br/&gt;
7) MDS_CLOSE &lt;span class=&quot;error&quot;&gt;&amp;#91;REINT_SETATTR&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="49333" author="utopiabound" created="Mon, 17 Dec 2012 16:52:51 +0000"  >&lt;p&gt;Client seems to be re-sending open create because response data is large and reply_in_callback() registers it as truncated and resends.  So the solution I&apos;m going to look to implement is to correctly handle the replay of the open/create for write/exec in mdt_mfd_open().&lt;/p&gt;</comment>
                            <comment id="49381" author="utopiabound" created="Tue, 18 Dec 2012 10:08:01 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/4848&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4848&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="50371" author="utopiabound" created="Fri, 11 Jan 2013 16:44:52 +0000"  >&lt;p&gt;Original Commit is now just testing updates:&lt;/p&gt;

&lt;p&gt;Following are cherry-picked commits that are dependent on 4848, 5002 is unchanged, 5003 needed a merge conflict resolved.&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/5002&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5002&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/5003&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5003&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="53714" author="utopiabound" created="Mon, 11 Mar 2013 14:16:23 +0000"  >&lt;p&gt;Patches picked to branch&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="15052">LU-1571</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="12836">LU-971</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="12025" name="client.inode.dlmtrace.debug" size="255" author="mhanafi" created="Mon, 5 Nov 2012 13:05:40 +0000"/>
                            <attachment id="12032" name="debug.duringcompile.gz" size="239" author="mhanafi" created="Wed, 7 Nov 2012 13:55:32 +0000"/>
                            <attachment id="12033" name="debug.duringldd.gz" size="231" author="mhanafi" created="Wed, 7 Nov 2012 13:55:32 +0000"/>
                            <attachment id="12024" name="mds.inode.dlmtrace.debug" size="783222" author="mhanafi" created="Mon, 5 Nov 2012 13:05:40 +0000"/>
                            <attachment id="12087" name="simple-2277.c" size="1437" author="utopiabound" created="Mon, 10 Dec 2012 13:17:33 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvbpb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5443</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>