<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:02:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3] Missing concurrent access control between statahead and VFS create/unlink operation caused dcache confused</title>
                <link>https://jira.whamcloud.com/browse/LU-3</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;As originally reported on Lustre Bugzilla:&lt;br/&gt;
&lt;a href=&quot;https://bugzilla.lustre.org/show_bug.cgi?id=15962&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugzilla.lustre.org/show_bug.cgi?id=15962&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This issue will cause LBUG on client-side as following:&lt;/p&gt;

&lt;p&gt;=====================&lt;br/&gt;
LustreError: 10521:0:(namei.c:826:ll_create_node()) ASSERTION(list_empty(&amp;amp;inode-&amp;gt;i_dentry)) failed^M&lt;br/&gt;
LustreError: 10521:0:(namei.c:826:ll_create_node()) LBUG^M&lt;br/&gt;
Pid: 10521, comm: dir_create.sh^M&lt;br/&gt;
^M&lt;br/&gt;
Call Trace:^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000f8e00640&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x50/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000f8e00d9d&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x6d/0xd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000f8e0c1f6&amp;gt;&amp;#93;&lt;/span&gt; libcfs_assertion_failed+0x66/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000f927c762&amp;gt;&amp;#93;&lt;/span&gt; ll_create_nd+0xb62/0xbd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000c0482b33&amp;gt;&amp;#93;&lt;/span&gt; vfs_create+0xc8/0x12f^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000c04854e4&amp;gt;&amp;#93;&lt;/span&gt; open_namei+0x16a/0x5f9^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000c0474b02&amp;gt;&amp;#93;&lt;/span&gt; do_filp_open+0x1c/0x31^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000c0474b55&amp;gt;&amp;#93;&lt;/span&gt; do_sys_open+0x3e/0xae^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000c0474bf2&amp;gt;&amp;#93;&lt;/span&gt; sys_open+0x16/0x18^M&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;00000000c0404f17&amp;gt;&amp;#93;&lt;/span&gt; syscall_call+0x7/0xb^M&lt;br/&gt;
=====================&lt;/p&gt;

&lt;p&gt;It has been reported many times on lustre-1.8 in racer test. Iit can be reproduced locally with testing racer repeatedly.&lt;/p&gt;</description>
                <environment></environment>
        <key id="10030">LU-3</key>
            <summary>Missing concurrent access control between statahead and VFS create/unlink operation caused dcache confused</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="yong.fan">nasf</reporter>
                        <labels>
                    </labels>
                <created>Thu, 16 Sep 2010 00:56:24 +0000</created>
                <updated>Sat, 8 Oct 2011 04:12:34 +0000</updated>
                            <resolved>Sat, 8 Oct 2011 04:12:34 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                    <version>Lustre 1.8.6</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="10020" author="yong.fan" created="Thu, 16 Sep 2010 01:14:55 +0000"  >&lt;p&gt;This is a mixed patch containing:&lt;/p&gt;

&lt;p&gt;1) introduce rw_semaphore &quot;lli_sa_rwsem&quot; for the synchronization control between statahead and VFS create operation.&lt;br/&gt;
2) introduce spin_lock &quot;lli_sa_lock&quot; (special) to replace &quot;lli_lock&quot; (shared) for statahead related critical protection.&lt;br/&gt;
3) fix statahead related LUBG reported in lustre-discuss mail list recently: &quot;(statahead.c:289:ll_sai_entry_fini()) LBUG&quot;.&lt;br/&gt;
4) drop unnecessary debug messages.&lt;br/&gt;
5) code cleanup.&lt;/p&gt;

&lt;p&gt;This patch will make current statahead on lustre-1.8 more smoothly.&lt;/p&gt;</comment>
                            <comment id="10055" author="yong.fan" created="Sun, 10 Oct 2010 20:40:45 +0000"  >&lt;p&gt;The patch has been attached to Lustre bugzilla bug 15962, to be inspected by Oracle&apos;s engineers.&lt;/p&gt;</comment>
                            <comment id="10415" author="kitwestneat" created="Tue, 11 Jan 2011 09:10:40 +0000"  >&lt;p&gt;Any progress on this? We have seen this happen at several sites and in fact have statahead disabled by default in our builds.&lt;/p&gt;</comment>
                            <comment id="10420" author="yong.fan" created="Wed, 12 Jan 2011 00:02:45 +0000"  >&lt;p&gt;The patch attached on bug 15962 is workable, but some inspector does not like it because it is hack, not only for the patch, but also for the statahead implementation. So it maybe not landed to the main branch. Currently, we are thinking about big change the statahead implementation to avoid those hack.&lt;/p&gt;

&lt;p&gt;So if you want, I think you can apply such patch as temporary solution.&lt;/p&gt;</comment>
                            <comment id="10567" author="yong.fan" created="Wed, 9 Feb 2011 02:27:21 +0000"  >&lt;p&gt;new patch is in inspection:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#change,2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="12461" author="sarah" created="Wed, 6 Apr 2011 22:10:24 +0000"  >&lt;p&gt;got this issue when running runracer on three clients:&lt;br/&gt;
build: lustre-master/rhel5-x86_64/#16&lt;br/&gt;
-------------------------------------------------------&lt;/p&gt;

&lt;p&gt;Apr  6 19:33:18 client-21 kernel: Lustre: DEBUG MARKER: == runracer test 1: racee&lt;br/&gt;
r on clients: client-5-ib,client-21-ib,client-22-ib DURATION=120 ============= 11&lt;br/&gt;
9:30:15 (1302143415)&lt;br/&gt;
Apr  6 19:33:18 client-21 xinetd&lt;span class=&quot;error&quot;&gt;&amp;#91;3131&amp;#93;&lt;/span&gt;: EXIT: shell status=0 pid=11966 duration==&lt;br/&gt;
0(sec)&lt;br/&gt;
Apr  6 19:33:18 client-21 xinetd&lt;span class=&quot;error&quot;&gt;&amp;#91;3131&amp;#93;&lt;/span&gt;: START: shell pid=11980 from=192.168.4.5&lt;br/&gt;
Apr  6 19:33:18 client-21 rshd&lt;span class=&quot;error&quot;&gt;&amp;#91;11981&amp;#93;&lt;/span&gt;: root@192.168.4.5 as root: cmd=&apos;(PATH=$PATT&lt;br/&gt;
H:/usr/lib64/lustre/utils:/usr/lib64/lustre/tests:/sbin:/usr/sbin; cd /usr/lib644&lt;br/&gt;
/lustre/tests; LUSTRE=&quot;/usr/lib64/lustre&quot;  sh -c &quot;DURATION=120 /usr/lib64/lustree&lt;br/&gt;
/tests/racer/racer.sh /mnt/lustre/racer &quot;);echo XXRETCODE:$?&apos;&lt;br/&gt;
Apr  6 19:33:34 client-21 kernel: LustreError: 11-0: an error occurred while comm&lt;br/&gt;
municating with 192.168.4.128@o2ib. The mds_getattr operation failed with -2&lt;br/&gt;
Apr  6 19:33:34 client-21 kernel: LustreError: Skipped 5 previous similar messagg&lt;br/&gt;
es&lt;br/&gt;
Apr  6 19:33:53 client-21 kernel: LustreError: 21677:0:(file.c:2151:ll_inode_revv&lt;br/&gt;
alidate_fini()) failure -2 inode 144115205339619325&lt;br/&gt;
Apr  6 19:34:08 client-21 kernel: LustreError: 1339:0:(file.c:2151:ll_inode_revaa&lt;br/&gt;
lidate_fini()) failure -116 inode 144115205322901335&lt;br/&gt;
Apr  6 19:34:19 client-21 kernel: LustreError: 10966:0:(file.c:2151:ll_inode_revv&lt;br/&gt;
alidate_fini()) failure -2 inode 144115205339624220&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel: LustreError: 24950:0:(namei.c:743:ll_create_noo&lt;br/&gt;
de()) ASSERTION(list_empty(&amp;amp;inode-&amp;gt;i_dentry)) failed&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel: LustreError: 24950:0:(namei.c:743:ll_create_noo&lt;br/&gt;
de()) LBUG&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel: Pid: 24950, comm: file_concat.sh&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel: Call Trace:&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff886625f1&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+00&lt;br/&gt;
x51/0x60 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff88662b2a&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x7a/0xd0  &lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8866d960&amp;gt;&amp;#93;&lt;/span&gt; cfs_tracefile_init+0x0/00&lt;br/&gt;
x10a &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff88aa00dc&amp;gt;&amp;#93;&lt;/span&gt; ll_create_nd+0x35c/0x9d00&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff80022924&amp;gt;&amp;#93;&lt;/span&gt; d_alloc+0x174/0x1a9&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8003a579&amp;gt;&amp;#93;&lt;/span&gt; vfs_create+0xe6/0x158&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8001b0d9&amp;gt;&amp;#93;&lt;/span&gt; open_namei+0x19d/0x6d5&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff80027533&amp;gt;&amp;#93;&lt;/span&gt; do_filp_open+0x1c/0x38&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff80019e5d&amp;gt;&amp;#93;&lt;/span&gt; do_sys_open+0x44/0xbe&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8005d116&amp;gt;&amp;#93;&lt;/span&gt; system_call+0x7e/0x83&lt;br/&gt;
Apr  6 19:35:07 client-21 kernel:&lt;/p&gt;</comment>
                            <comment id="16440" author="yong.fan" created="Thu, 16 Jun 2011 03:37:35 +0000"  >&lt;p&gt;For 2.6.38 or newer kernel, RCU is used to make a significant part of the entire path walk. This is known as &quot;rcu-walk&quot; path walking. But even if that, the parent&apos;s &quot;i_muext&quot; is necessary for the caller of &quot;do_lookup()&quot; to prevent concurrent lookup/create operations with the same under the same parent.&lt;/p&gt;

&lt;p&gt;======&lt;br/&gt;
        mutex_lock(&amp;amp;dir-&amp;gt;i_mutex);&lt;br/&gt;
        /*&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;First re-do the cached lookup just in case it was created&lt;/li&gt;
	&lt;li&gt;while we waited for the directory semaphore, or the first&lt;/li&gt;
	&lt;li&gt;lookup failed due to an unrelated rename.&lt;br/&gt;
         *&lt;/li&gt;
	&lt;li&gt;This could use version numbering or similar to avoid unnecessary&lt;/li&gt;
	&lt;li&gt;cache lookups, but then we&apos;d have to do the first lookup in the&lt;/li&gt;
	&lt;li&gt;non-racy way. However in the common case here, everything should&lt;/li&gt;
	&lt;li&gt;be hot in cache, so would it be a big win?&lt;br/&gt;
         */&lt;br/&gt;
        dentry = d_lookup(parent, name);&lt;br/&gt;
        if (likely(!dentry)) 
{
                dentry = d_alloc_and_lookup(parent, name, nd);
                mutex_unlock(&amp;amp;dir-&amp;gt;i_mutex);
                if (IS_ERR(dentry))
                        goto fail;
                goto done;
        }
&lt;p&gt;        /*&lt;/p&gt;&lt;/li&gt;
	&lt;li&gt;Uhhuh! Nasty case: the cache was re-populated while&lt;/li&gt;
	&lt;li&gt;we waited on the semaphore. Need to revalidate.&lt;br/&gt;
         */&lt;br/&gt;
        mutex_unlock(&amp;amp;dir-&amp;gt;i_mutex);&lt;br/&gt;
======&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;So we still have to adjust statahead to make it controllable under concurrent access mode with other VFS create/unlink operations.&lt;/p&gt;</comment>
                            <comment id="17379" author="yong.fan" created="Thu, 7 Jul 2011 10:47:10 +0000"  >&lt;p&gt;The patch for removing statahead dcache hack:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,1208&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,1208&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="20984" author="yong.fan" created="Sat, 8 Oct 2011 04:12:34 +0000"  >&lt;p&gt;The race conditions caused by statahead will be fixed by the patch(es) for ORNL-7&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="10000" name="b15962_b18.patch" size="60461" author="yong.fan" created="Thu, 16 Sep 2010 01:14:55 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                    <customfield id="customfield_10020" key="com.atlassian.jira.plugin.system.customfieldtypes:float">
                        <customfieldname>Bugzilla ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15962.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10040" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic</customfieldname>
                        <customfieldvalues>
                                        <label>statahead</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw1e7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10307</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>