<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:46:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4858] Race when reading ofd proc entries while unmounting OST</title>
                <link>https://jira.whamcloud.com/browse/LU-4858</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This bug was discovered due to the fact we run a utility that monitors the OST stats from /proc/fs/lustre/obdfilter/fsname-OSTXXXX. These stats were still being accessed while the test file system was taken down.&lt;/p&gt;

&lt;p&gt;&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.199509&amp;#93;&lt;/span&gt; Lustre: server umount a1_thin-OST0129 complete&lt;br/&gt;
&amp;lt;0&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.339106&amp;#93;&lt;/span&gt; LustreError: 9005:0:(dt_object.h:1257:dt_statfs()) ASSERTION( dev ) failed: &lt;br/&gt;
&amp;lt;0&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.357847&amp;#93;&lt;/span&gt; LustreError: 9005:0:(dt_object.h:1257:dt_statfs()) LBUG&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.377690&amp;#93;&lt;/span&gt; Pid: 9005, comm: cerebrod&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.387927&amp;#93;&lt;/span&gt; &lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.387928&amp;#93;&lt;/span&gt; Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.406853&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03d4895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.427104&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03d4e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.447295&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d8b3be&amp;gt;&amp;#93;&lt;/span&gt; ofd_statfs_internal+0x23e/0x2a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.467649&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d8b487&amp;gt;&amp;#93;&lt;/span&gt; ofd_statfs+0x67/0x510 &lt;span class=&quot;error&quot;&gt;&amp;#91;ofd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.487335&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05496d0&amp;gt;&amp;#93;&lt;/span&gt; lprocfs_rd_filesfree+0x170/0x4c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.508376&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811e9823&amp;gt;&amp;#93;&lt;/span&gt; ? proc_reg_open+0xc3/0x160&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.528056&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811e9760&amp;gt;&amp;#93;&lt;/span&gt; ? proc_reg_open+0x0/0x160&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.547148&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0546563&amp;gt;&amp;#93;&lt;/span&gt; lprocfs_fops_read+0xf3/0x1f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.567489&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811e9d9e&amp;gt;&amp;#93;&lt;/span&gt; proc_reg_read+0x7e/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.587040&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81181cb5&amp;gt;&amp;#93;&lt;/span&gt; vfs_read+0xb5/0x1a0&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.597941&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81181df1&amp;gt;&amp;#93;&lt;/span&gt; sys_read+0x51/0x90&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.617517&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100b072&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.637694&amp;#93;&lt;/span&gt; &lt;br/&gt;
&amp;lt;0&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;85795.647408&amp;#93;&lt;/span&gt; Kernel panic - not syncing: LBUG&lt;/p&gt;

&lt;p&gt;We have core dumps as well.&lt;/p&gt;</description>
                <environment>Affects any OSS with any environment.</environment>
        <key id="24047">LU-4858</key>
            <summary>Race when reading ofd proc entries while unmounting OST</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="simmonsja">James A Simmons</reporter>
                        <labels>
                            <label>mn4</label>
                    </labels>
                <created>Thu, 3 Apr 2014 14:12:28 +0000</created>
                <updated>Wed, 28 May 2014 07:12:45 +0000</updated>
                            <resolved>Thu, 8 May 2014 14:48:40 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.5.1</version>
                    <version>Lustre 2.4.3</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.2</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="80940" author="jamesanunez" created="Thu, 3 Apr 2014 14:43:36 +0000"  >&lt;p&gt;James, &lt;/p&gt;

&lt;p&gt;Are you planning on investigating this one and following this up with a patch?&lt;/p&gt;

&lt;p&gt;Thanks, &lt;br/&gt;
James&lt;/p&gt;</comment>
                            <comment id="81032" author="bfaccini" created="Fri, 4 Apr 2014 08:40:30 +0000"  >&lt;p&gt;James, But why don&apos;t you stop the utility before you take the filesystem down ??!! &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;More seriously, at 1st look it seems not very logical that in ofd_&lt;span class=&quot;error&quot;&gt;&amp;#91;init0,fini&amp;#93;&lt;/span&gt;() routines, the procfs stuff is initialized/stopped (via ofd_procfs_init()/ofd_profs_fini() routines) before/after others players (like the &quot;stack&quot; and its ofd_stack_init()/ofd_stack_fini() routines) it depends on. May be we can just change this order in the code ? If not easy/possible, then some protection will have to be added to prevent the race.&lt;/p&gt;</comment>
                            <comment id="81037" author="pjones" created="Fri, 4 Apr 2014 12:00:17 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please assist with this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="81066" author="simmonsja" created="Fri, 4 Apr 2014 17:53:39 +0000"  >&lt;p&gt;Matt Ezell just came up with a patch &lt;a href=&quot;http://review.whamcloud.com/#/c/9885&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9885&lt;/a&gt; to expose the problem. No patch from me yet. Next week I will be at LUG so it might be a while before I have a patch to test.&lt;/p&gt;</comment>
                            <comment id="81133" author="ezell" created="Mon, 7 Apr 2014 18:22:56 +0000"  >&lt;p&gt;Bruno - the tool is LMT, which is always running in the background.  In this specific case, we were only unmounting some of the OSTs from the OSS.  So we wanted it to remain running.&lt;/p&gt;

&lt;p&gt;I had a good reproducer in my test environment based on 2.4, but I couldn&apos;t get it to fail in Maloo.  It turns out that in master, some of the OBD methods (including statfs) were removed from OFD.  So accessing /proc/fs/lustre/obdfilter/fsname-OSTXXXX/filesfree (for example) now returns EOPNOTSUPP.  So for master we probably want to symlink the following entries to their corresponding ../../osd-*/fsname-OSTXXXX/&amp;lt;name&amp;gt; entry, similar to how we have brw_stats, read_cache_enable, readcache_max_filesize, and writethrough_cache_enable.  If this doesn&apos;t happen, tools like LMT may break.  The ones I found that don&apos;t work are:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;blocksize
filesfree
filestotal
kbytesavail
kbytesfree
kbytestotal
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;After that change, I&apos;m not sure if any of the remaining code paths can hit this race or not.  Certainly my old reproducer doesn&apos;t work anymore.  And having a client do something like &quot;df $MOUNT&quot; just hangs while the unmount is happening.&lt;/p&gt;

&lt;p&gt;I haven&apos;t tried Bruno&apos;s suggestion of just reordering the OST tear-down procedure, but I would expect it to be safe.&lt;/p&gt;

&lt;p&gt;This also brings up another question of when to use a LASSERT versus handle the unexpected situation gracefully.  That is, when should&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;LASSERT( dev )&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;be written as&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; ( dev == NULL )
    &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; -ENODEV;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="82387" author="laisiyao" created="Thu, 24 Apr 2014 14:17:56 +0000"  >&lt;p&gt;patches are ready:&lt;br/&gt;
master: &lt;a href=&quot;http://review.whamcloud.com/#/c/10082/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10082/&lt;/a&gt;&lt;br/&gt;
2.4: &lt;a href=&quot;http://review.whamcloud.com/#/c/10083/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10083/&lt;/a&gt;&lt;br/&gt;
2.5: &lt;a href=&quot;http://review.whamcloud.com/#/c/10084/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10084/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Matt, could you help verify it works in your system? BTW I think you can revive &lt;a href=&quot;http://review.whamcloud.com/#/c/9885&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9885&lt;/a&gt; since my patch will fix the issue you mentioned above.&lt;/p&gt;</comment>
                            <comment id="83509" author="pjones" created="Thu, 8 May 2014 14:48:40 +0000"  >&lt;p&gt;Landed for 2.6. Will consider for b2_5 and b2_4 also.&lt;/p&gt;</comment>
                            <comment id="85008" author="adilger" created="Wed, 28 May 2014 07:12:45 +0000"  >&lt;p&gt;Patch landed to b2_5 for 2.5.2.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwj6n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13397</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>