<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:29:40 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16747] concurrency issue in get_root_path_slow()</title>
                <link>https://jira.whamcloud.com/browse/LU-16747</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Since the integration of &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/36603&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/36603&lt;/a&gt; (&quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8585&quot; title=&quot;All Lustre test suites should pass with subdirectory mount&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8585&quot;&gt;LU-8585&lt;/a&gt; llapi: use open_by_handle_at in llapi_open_by_fid&quot;) at CEA, we observed the following errors when starting a robinhood policy process:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;rh_purge.log:2023/04/11 11:43:33 [24565/6] lhsm | ERROR performing HSM
request(RELEASE, root=/mnt/lustre, fid=[0x200000bd1:0x599:0x0]): Bad
file descriptor
rh_purge.log:2023/04/11 11:43:33 [24565/6] purge | Error applying action
on entry /mnt/lustre/file.5: Bad file descriptor
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The error is triggered inside &quot;llapi_hsm_request()&quot;.&lt;/p&gt;

&lt;p&gt;This is because several threads are started at the same time with an uninitialized root_cache entry. Multiple threads are running inside get_root_path_slow() at the same moment:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;Thread 1 takes write lock on root_cache and updates the root_cache.fd.&lt;/li&gt;
	&lt;li&gt;Thread 2 takes write lock on root_cache, closes the root_cache.fd and then updates it.&lt;/li&gt;
	&lt;li&gt;Thread 1 returns a closed file descriptor to  llapi_hsm_request().&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;Here a reproducer:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-c&quot;&gt;
&lt;span class=&quot;code-macro&quot;&gt;#define TEST13_THR_NBR 20                                                                       
&lt;/span&gt;&lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;void&lt;/span&gt;&lt;/span&gt; *test13_thr(&lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;void&lt;/span&gt;&lt;/span&gt; *arg)                                                                     
{                                                                                               
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt;&lt;/span&gt; *fidstr = arg;                                                                     
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt;&lt;/span&gt; path[PATH_MAX];                                                                    
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; recno = -1;                                                                   
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt;&lt;/span&gt; linkno = 0;                                                                         
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; rc;                                                                           
                                                                                                
        rc = llapi_fid2path(lustre_dir2, fidstr, path,                                          
                            &lt;span class=&quot;code-keyword&quot;&gt;sizeof&lt;/span&gt;(path), &amp;amp;recno, &amp;amp;linkno);                                     
                                                                                                
        &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; (&lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;void&lt;/span&gt;&lt;/span&gt; *) rc;                                                                     
}                                                                                               
                                                                                                
&lt;span class=&quot;code-comment&quot;&gt;/* Test llapi root cache on multi-threading context */&lt;/span&gt;                                          
&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;void&lt;/span&gt;&lt;/span&gt; test13(&lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;void&lt;/span&gt;&lt;/span&gt;)                                                                        
{                                                                                               
        &lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; pthread_t thread[TEST13_THR_NBR];                                                
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt;&lt;/span&gt; fd, i, iter;                                                                        
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; rc;                                                                           
        &lt;span class=&quot;code-keyword&quot;&gt;struct&lt;/span&gt; lu_fid fid;                                                                      
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt;&lt;/span&gt; fidstr[FID_LEN + 1];                                                               
                                                                                                
        fd = creat(mainpath, 00660);                                                            
        ASSERTF(fd &amp;gt;= 0, &lt;span class=&quot;code-quote-red&quot;&gt;&quot;creat failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;code-quote-red&quot;&gt;&apos;%s&apos;&lt;/span&gt;: %s&quot;&lt;/span&gt;,                                           
                mainpath, strerror(errno));                                                     
                                                                                                
        rc = llapi_fd2fid(fd, &amp;amp;fid);                                                            
        ASSERTF(rc == 0, &lt;span class=&quot;code-quote-red&quot;&gt;&quot;llapi_fd2fid failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;code-quote-red&quot;&gt;&apos;%s&apos;&lt;/span&gt;: %s&quot;&lt;/span&gt;,                                    
                mainpath, strerror(-rc));                                                       
        close(fd);                                                                              
                                                                                                
        snprintf(fidstr, &lt;span class=&quot;code-keyword&quot;&gt;sizeof&lt;/span&gt;(fidstr), DFID_NOBRACE, PFID(&amp;amp;fid));                             
        &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; (iter = 0; iter &amp;lt; 100; iter++) {                                                    
                &lt;span class=&quot;code-comment&quot;&gt;/* reset cache on first mountpoint */&lt;/span&gt;                                           
                fd = llapi_open_by_fid(lustre_dir, &amp;amp;fid, O_RDONLY);                             
                ASSERTF(fd &amp;gt;= 0, &lt;span class=&quot;code-quote-red&quot;&gt;&quot;llapi_open_by_fid &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; &quot;&lt;/span&gt; DFID_NOBRACE &lt;span class=&quot;code-quote-red&quot;&gt;&quot;: %d&quot;&lt;/span&gt;,                  
                        PFID(&amp;amp;fid), fd);                                                        
                close(fd);                                                                      
                                                                                                
                &lt;span class=&quot;code-comment&quot;&gt;/* start threads with llapi_fid2path() */&lt;/span&gt;                                    
                &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; (i = 0; i &amp;lt; TEST13_THR_NBR; i++)                                            
                        pthread_create(&amp;amp;thread[i], &lt;span class=&quot;code-keyword&quot;&gt;NULL&lt;/span&gt;, &amp;amp;test13_thr, fidstr);                  
                                                                                                
                &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; (i = 0; i &amp;lt; TEST13_THR_NBR; i++) {                                          
                        pthread_join(thread[i], (&lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;void&lt;/span&gt;&lt;/span&gt; **) &amp;amp;rc);                                 
                        ASSERTF(rc == 0,                                                        
                                &lt;span class=&quot;code-quote-red&quot;&gt;&quot;llapi_fid2path &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; &quot;&lt;/span&gt; DFID_NOBRACE &lt;span class=&quot;code-quote-red&quot;&gt;&quot; (iter: %d, thr:%d): %s&quot;&lt;/span&gt;,   
                                PFID(&amp;amp;fid), iter, i, strerror(-rc));                            
                }                                                                               
        }                                                                                       
}                                                                                               
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="75613">LU-16747</key>
            <summary>concurrency issue in get_root_path_slow()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="eaujames">Etienne Aujames</assignee>
                                    <reporter username="eaujames">Etienne Aujames</reporter>
                        <labels>
                    </labels>
                <created>Mon, 17 Apr 2023 15:10:52 +0000</created>
                <updated>Sun, 16 Jul 2023 23:07:05 +0000</updated>
                            <resolved>Tue, 20 Jun 2023 17:22:09 +0000</resolved>
                                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="369821" author="gerrit" created="Tue, 18 Apr 2023 19:21:31 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/50682&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/50682&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16747&quot; title=&quot;concurrency issue in get_root_path_slow()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16747&quot;&gt;&lt;del&gt;LU-16747&lt;/del&gt;&lt;/a&gt; llapi: fix race in get_root_path_slow()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2ee9f3eb401b37e4a5c8f74bcf2e07ecbc38966d&lt;/p&gt;</comment>
                            <comment id="375844" author="gerrit" created="Mon, 19 Jun 2023 13:52:20 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51367&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51367&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16747&quot; title=&quot;concurrency issue in get_root_path_slow()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16747&quot;&gt;&lt;del&gt;LU-16747&lt;/del&gt;&lt;/a&gt; llapi: fix race in get_root_path_slow()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_15&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e364909d72a9ae48699e171b1ee10170b9a8fbce&lt;/p&gt;</comment>
                            <comment id="375902" author="gerrit" created="Tue, 20 Jun 2023 03:40:43 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/50682/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/50682/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16747&quot; title=&quot;concurrency issue in get_root_path_slow()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16747&quot;&gt;&lt;del&gt;LU-16747&lt;/del&gt;&lt;/a&gt; llapi: fix race in get_root_path_slow()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9ef1e097d53000233f9ba23319268f467c276173&lt;/p&gt;</comment>
                            <comment id="375993" author="pjones" created="Tue, 20 Jun 2023 17:22:09 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03j5z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>