<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:48:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5107] MDS oops during mount with latest lustre 2.5.1 snapshot</title>
                <link>https://jira.whamcloud.com/browse/LU-5107</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;With the latest 2.5.1 snapshot when I attempt to bring up a file system I&apos;m seeing the following bug on the MDS during the MDT mount. Because of this I can&apos; currently mount a 2.5 file system for testing.&lt;/p&gt;

&lt;p&gt;May 27 16:55:19 tick-dne-mds1 kernel: [  546.512335] LustreError: 13869:0:(osp_dev.c:864:osp_prepare_fid_client()) ASSERTION( osp-&amp;gt;opd&lt;br/&gt;
_obd-&amp;gt;u.cli.cl_seq != ((void *)0) ) failed:&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.548335] LustreError: 13869:0:(osp_dev.c:864:osp_prepare_fid_client()) LBUG&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.569232] Pid: 13869, comm: ptlrpcd_rcv&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.579503]&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.579503] Call Trace:&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.598249]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05f3895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.618857]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05f3e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.638835]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa108ee34&amp;gt;&amp;#93;&lt;/span&gt; osp_import_event+0x3d4/0x410 &lt;span class=&quot;error&quot;&gt;&amp;#91;osp&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.659079]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09207cc&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_activate_import+0x12c/0x270 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.688094]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0923502&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_connect_interpret+0x1912/0x2160 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.708910]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa08f894c&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_check_set+0x2bc/0x1b50 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:19 tick-dne-mds1 kernel: [  546.729238]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0924cab&amp;gt;&amp;#93;&lt;/span&gt; ptlrpcd_check+0x53b/0x560 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.749421]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09251cb&amp;gt;&amp;#93;&lt;/span&gt; ptlrpcd+0x20b/0x370 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.769286]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81061d00&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.789361]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0924fc0&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpcd+0x0/0x370 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.809414]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ab56&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.828800]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.848257]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aac0&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.859345]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
May 27 16:55:20 tick-dne-mds1 kernel: [  546.879015]&lt;/p&gt;</description>
                <environment>MDS server</environment>
        <key id="24832">LU-5107</key>
            <summary>MDS oops during mount with latest lustre 2.5.1 snapshot</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="di.wang">Di Wang</assignee>
                                    <reporter username="simmonsja">James A Simmons</reporter>
                        <labels>
                    </labels>
                <created>Tue, 27 May 2014 21:02:53 +0000</created>
                <updated>Fri, 30 May 2014 19:48:56 +0000</updated>
                            <resolved>Fri, 30 May 2014 19:48:56 +0000</resolved>
                                    <version>Lustre 2.5.1</version>
                                    <fixVersion>Lustre 2.5.2</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="84983" author="jamesanunez" created="Tue, 27 May 2014 21:21:51 +0000"  >&lt;p&gt;Di, &lt;/p&gt;

&lt;p&gt;Would you please comment on this ticket?&lt;/p&gt;

&lt;p&gt;Thank you,&lt;br/&gt;
James&lt;/p&gt;</comment>
                            <comment id="84990" author="di.wang" created="Tue, 27 May 2014 22:48:54 +0000"  >&lt;p&gt;James,&lt;/p&gt;

&lt;p&gt;Did you setup lustre with single MDT or DNE? Are there any other console error message? Could you tell me which build are you using? It is a new formatted FS? Do you have the dump log for this LBUG?&lt;/p&gt;

&lt;p&gt;Thank you.&lt;br/&gt;
WangDi&lt;/p&gt;</comment>
                            <comment id="85028" author="simmonsja" created="Wed, 28 May 2014 14:40:36 +0000"  >&lt;p&gt;I tried a build with a few extra patches. Then I tried the tip of b2_5 and it was the same problem. Yes it is a DNE setup with 3 MDS servers. When I encountered this error I was using a already formatted 2.5 file system. I later reformatted to make sure that was not the issue but the MDS oops was still there. I found that reverting &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4413&quot; title=&quot;Test failure on test suite conf-sanity, subtest test_56&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4413&quot;&gt;&lt;del&gt;LU-4413&lt;/del&gt;&lt;/a&gt; appears to make the problem go away.&lt;br/&gt;
I don&apos;t think reverting that patch is the solution. I have placed the dmesg log and vmcore dump at ftp.whamcloud.com/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5107&quot; title=&quot;MDS oops during mount with latest lustre 2.5.1 snapshot&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5107&quot;&gt;&lt;del&gt;LU-5107&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="85046" author="adilger" created="Wed, 28 May 2014 18:08:39 +0000"  >&lt;p&gt;James, there are two patches on &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4413&quot; title=&quot;Test failure on test suite conf-sanity, subtest test_56&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4413&quot;&gt;&lt;del&gt;LU-4413&lt;/del&gt;&lt;/a&gt;:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;&lt;a href=&quot;http://review.whamcloud.com/8997&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8997&lt;/a&gt; - osp: move seq allocation out of osp_import_event&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;http://review.whamcloud.com/8996&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8996&lt;/a&gt; - ptlrpc: don&apos;t try to recover no_recov connection&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Which one did you revert to fix the problem?&lt;/p&gt;</comment>
                            <comment id="85047" author="simmonsja" created="Wed, 28 May 2014 18:11:27 +0000"  >&lt;p&gt;I reverted patch 8997.&lt;/p&gt;</comment>
                            <comment id="85057" author="di.wang" created="Wed, 28 May 2014 18:37:40 +0000"  >&lt;p&gt;Hmm, there are some problems for 8997 when port it to 2.5. Since we do not need OSP(for MDT) to allocate FID, so osp_prepare_fid_client(d)  needs to be moved after if (d-&amp;gt;opd_connect_mdt) check in osp_import_event.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c
index a4a2f90..15f2ec0 100644
--- a/lustre/osp/osp_dev.c
+++ b/lustre/osp/osp_dev.c
@@ -1053,15 +1053,16 @@ static int osp_import_event(struct obd_device *obd, struct obd_import *imp,
        case IMP_EVENT_ACTIVE:
                d-&amp;gt;opd_imp_active = 1;
 
-               if (osp_prepare_fid_client(d) != 0)
-                       break;
-
                if (d-&amp;gt;opd_got_disconnected)
                        d-&amp;gt;opd_new_connection = 1;
                d-&amp;gt;opd_imp_connected = 1;
                d-&amp;gt;opd_imp_seen_connected = 1;
                if (d-&amp;gt;opd_connect_mdt)
                        break;
+
+               if (osp_prepare_fid_client(d) != 0)
+                       break;
+
                wake_up(&amp;amp;d-&amp;gt;opd_pre_waitq);
                __osp_sync_check_for_work(d);
                CDEBUG(D_HA, &quot;got connected\n&quot;);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt; 

&lt;p&gt;probably fix the problem, I will cook a patch.&lt;/p&gt;</comment>
                            <comment id="85072" author="di.wang" created="Wed, 28 May 2014 20:40:08 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/10476&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10476&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="85129" author="simmonsja" created="Thu, 29 May 2014 14:23:56 +0000"  >&lt;p&gt;The patch appears to have resolved the issue. Thank you.&lt;/p&gt;</comment>
                            <comment id="85184" author="adilger" created="Thu, 29 May 2014 22:25:24 +0000"  >&lt;p&gt;Problem was caused by backport of patch &lt;a href=&quot;http://review.whamcloud.com/9875&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9875&lt;/a&gt; to b2_5.&lt;/p&gt;</comment>
                            <comment id="85302" author="pjones" created="Fri, 30 May 2014 19:48:56 +0000"  >&lt;p&gt;Landed for 2.5.2. As I understand it, this issue only affected b2_5 so is not needed on other branches&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="22563">LU-4413</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwn53:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14091</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>