<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:23:53 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2279] replay-single test 88 LustreError: 10305:0:(osp_precreate.c:694:osp_precreate_reserve()) LBUG</title>
                <link>https://jira.whamcloud.com/browse/LU-2279</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running replay-single, I hit this LBUG:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[50576.766529] Lustre: DEBUG MARKER: == replay-single test 88: MDS should not assign same objid to different files == 01:10:48 (1352095848)
[50576.964168] Turning device loop1 (0x700001) read-only
[50576.985608] Lustre: DEBUG MARKER: ost1 REPLAY BARRIER on lustre-OST0000
[50576.989899] Lustre: DEBUG MARKER: local REPLAY BARRIER on lustre-OST0000
[50577.290548] Turning device loop0 (0x700000) read-only
[50577.316807] Lustre: DEBUG MARKER: mds1 REPLAY BARRIER on lustre-MDT0000
[50577.323211] Lustre: DEBUG MARKER: local REPLAY BARRIER on lustre-MDT0000
[50577.541027] Lustre: lustre-OST0000-osc-MDT0000: slow creates, last=8104, next=8104, reserved=0, syn_changes=0, syn_rpc_in_progress=0, status=-19
[50577.541472] LustreError: 10305:0:(osp_precreate.c:694:osp_precreate_reserve()) LBUG
[50577.541985] Pid: 10305, comm: mdt00_000
[50577.542331] 
[50577.542332] Call Trace:
[50577.542866]  [&amp;lt;ffffffffa0455915&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[50577.543256]  [&amp;lt;ffffffffa0455f27&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[50577.543618]  [&amp;lt;ffffffffa0e4473c&amp;gt;] osp_precreate_reserve+0x50c/0x600 [osp]
[50577.544000]  [&amp;lt;ffffffffa0e3d7a0&amp;gt;] ? osp_object_alloc+0x120/0x290 [osp]
[50577.544368]  [&amp;lt;ffffffffa0e43533&amp;gt;] osp_declare_object_create+0x163/0x540 [osp]
[50577.544785]  [&amp;lt;ffffffffa07384bd&amp;gt;] lod_qos_declare_object_on+0xed/0x4c0 [lod]
[50577.545173]  [&amp;lt;ffffffffa073b65b&amp;gt;] lod_qos_prep_create+0x92b/0x1848 [lod]
[50577.545549]  [&amp;lt;ffffffffa073686b&amp;gt;] lod_declare_striped_object+0x14b/0x920 [lod]
[50577.546558]  [&amp;lt;ffffffffa0737348&amp;gt;] lod_declare_object_create+0x308/0x4f0 [lod]
[50577.546905]  [&amp;lt;ffffffffa0cf1c4f&amp;gt;] mdd_declare_object_create_internal+0xaf/0x1d0 [mdd]
[50577.547452]  [&amp;lt;ffffffffa0d02aca&amp;gt;] mdd_create+0x39a/0x1550 [mdd]
[50577.547770]  [&amp;lt;ffffffffa0de9d28&amp;gt;] mdt_reint_open+0x1078/0x1870 [mdt]
[50577.548094]  [&amp;lt;ffffffffa0d08b0e&amp;gt;] ? md_ucred+0x1e/0x60 [mdd]
[50577.548403]  [&amp;lt;ffffffffa0db5655&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
[50577.548718]  [&amp;lt;ffffffffa0dd4c81&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
[50577.549059]  [&amp;lt;ffffffffa0dce413&amp;gt;] mdt_reint_internal+0x4e3/0x7e0 [mdt]
[50577.549395]  [&amp;lt;ffffffffa0dce9dd&amp;gt;] mdt_intent_reint+0x1ed/0x500 [mdt]
[50577.549715]  [&amp;lt;ffffffffa0dca3ae&amp;gt;] mdt_intent_policy+0x38e/0x770 [mdt]
[50577.550077]  [&amp;lt;ffffffffa0783dda&amp;gt;] ldlm_lock_enqueue+0x2ea/0x890 [ptlrpc]
[50577.550424]  [&amp;lt;ffffffffa07ab187&amp;gt;] ldlm_handle_enqueue0+0x4e7/0x1010 [ptlrpc]
[50577.550761]  [&amp;lt;ffffffffa0dca8e6&amp;gt;] mdt_enqueue+0x46/0x130 [mdt]
[50577.551076]  [&amp;lt;ffffffffa0dbe1f2&amp;gt;] mdt_handle_common+0x932/0x1740 [mdt]
[50577.551425]  [&amp;lt;ffffffffa0dbf0d5&amp;gt;] mdt_regular_handle+0x15/0x20 [mdt]
[50577.551797]  [&amp;lt;ffffffffa07d9573&amp;gt;] ptlrpc_server_handle_request+0x463/0xe70 [ptlrpc]
[50577.552413]  [&amp;lt;ffffffffa045666e&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
[50577.552697]  [&amp;lt;ffffffffa07d2241&amp;gt;] ? ptlrpc_wait_event+0xb1/0x2a0 [ptlrpc]
[50577.552950]  [&amp;lt;ffffffff81051f73&amp;gt;] ? __wake_up+0x53/0x70
[50577.553303]  [&amp;lt;ffffffffa07dc10a&amp;gt;] ptlrpc_main+0xb9a/0x1960 [ptlrpc]
[50577.553709]  [&amp;lt;ffffffffa07db570&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[50577.553927]  [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
[50577.554222]  [&amp;lt;ffffffffa07db570&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[50577.554605]  [&amp;lt;ffffffffa07db570&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[50577.554960]  [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
[50577.555289] 
[50577.560016] Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;crashdump and modules are at /exports/crashdumps/192.168.10.210-2012-11-05-01\:10\:50&lt;/p&gt;</description>
                <environment></environment>
        <key id="16592">LU-2279</key>
            <summary>replay-single test 88 LustreError: 10305:0:(osp_precreate.c:694:osp_precreate_reserve()) LBUG</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                            <label>MB</label>
                            <label>sequoia</label>
                    </labels>
                <created>Mon, 5 Nov 2012 15:24:24 +0000</created>
                <updated>Fri, 19 Apr 2013 20:48:02 +0000</updated>
                            <resolved>Tue, 8 Jan 2013 23:00:46 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="48032" author="prakash" created="Mon, 19 Nov 2012 19:09:38 +0000"  >&lt;p&gt;Hit this on our test MDS after updating to our &lt;tt&gt;2.3.56-1chaos&lt;/tt&gt; tag.&lt;/p&gt;</comment>
                            <comment id="48094" author="prakash" created="Tue, 20 Nov 2012 13:24:53 +0000"  >&lt;p&gt;Bump. Hit again on reboot.&lt;/p&gt;</comment>
                            <comment id="48204" author="bzzz" created="Wed, 21 Nov 2012 15:29:38 +0000"  >&lt;p&gt;Prakash, are you getting this on replay-single/88 or on a regular (not driven by test-framework) umount ?&lt;/p&gt;</comment>
                            <comment id="48209" author="prakash" created="Wed, 21 Nov 2012 17:05:26 +0000"  >&lt;p&gt;Not driven by test-framework. My cases were on our Grove (development) filesystem.&lt;/p&gt;</comment>
                            <comment id="48639" author="di.wang" created="Sat, 1 Dec 2012 03:45:15 +0000"  >&lt;p&gt;Alex, we also hit this in DNE tests, with 4MDS/8MDT. It seems busy loop check should not include NODEV cases (i.e. ost is not being setup in time). Do you think this patch reasonable?&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c
index 62b1ef5..48ee758 100644
--- a/lustre/osp/osp_precreate.c
+++ b/lustre/osp/osp_precreate.c
@@ -1066,19 +1066,17 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
                if (unlikely(rc == -ENODEV)) {
                        if (cfs_time_aftereq(cfs_time_current(), expire))
                                break;
-               }
 
+               } else if (unlikely(count++ == 1000)) {
 #if LUSTRE_VERSION_CODE &amp;lt; OBD_OCD_VERSION(2, 3, 90, 0)
-               /*
-                * to address Andreas&apos;s concern on possible busy-loop
-                * between this thread and osp_precreate_send()
-                */
-               if (unlikely(count++ == 1000)) {
+                       /*
+                        * to address Andreas&apos;s concern on possible busy-loop
+                        * between this thread and osp_precreate_send()
+                        */
                        osp_precreate_timeout_condition(d);
                        LBUG();
-               }
 #endif
-
+               }
                /*
                 * increase number of precreations
                 */

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="48640" author="bzzz" created="Sat, 1 Dec 2012 08:17:41 +0000"  >&lt;p&gt;well, the problem is not in ENODEV itself, but how often we&apos;re awaken. if we do not wait and no alive OST is found, then file creation fails.&lt;br/&gt;
the question is why (or who) wakes this thread up. can you investigate please ?&lt;/p&gt;</comment>
                            <comment id="48646" author="di.wang" created="Sat, 1 Dec 2012 19:01:33 +0000"  >&lt;p&gt;Hmm, if precreate thread should not be wake up in this case, then probably osp_precreate_ready_condition should be fixed ? (skip -ENODEV for second if?)&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;static int osp_precreate_ready_condition(const struct lu_env *env,
                                         struct osp_device *d)
{
        /* ready if got enough precreated objects */
        /* we need to wait for others (opd_pre_reserved) and our object (+1) */
        if (d-&amp;gt;opd_pre_reserved + 1 &amp;lt; osp_objs_precreated(env, d))
                return 1;

        /* ready if OST reported no space and no destoys in progress */
        if (d-&amp;gt;opd_syn_changes + d-&amp;gt;opd_syn_rpc_in_progress == 0 &amp;amp;&amp;amp;
            d-&amp;gt;opd_pre_status != 0)
                return 1;

        return 0;
}

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt; 

&lt;p&gt;It seems it will always return 1 on second if, because syn_changes and rpc_in_progress are zero at this moment, and pre_status == -ENODEV.&lt;/p&gt;</comment>
                            <comment id="48648" author="bzzz" created="Sun, 2 Dec 2012 00:13:39 +0000"  >&lt;p&gt;but we do not want to be blocked forever. this is why osp_precreate_reserve() exits when specified period expires. again the question why is it woken up so often?&lt;br/&gt;
given there is no connection, why status has changed that many times?&lt;/p&gt;</comment>
                            <comment id="48650" author="di.wang" created="Sun, 2 Dec 2012 18:42:04 +0000"  >&lt;p&gt;I am confused here, or I miss something probably. According to the log we saw here&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[50577.541027] Lustre: lustre-OST0000-osc-MDT0000: slow creates, last=8104, next=8104, reserved=0, syn_changes=0, syn_rpc_in_progress=0, status=-19
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;sync_changes and syn_rpc_in_progress are zero as expected, and status is -19 because ost is not setup in time, so the loop in osp_precreate_reserve will&lt;br/&gt;
not sleep at all (why you say it was waken up so often?), i.e. it will keep running until count reach 1000 and hit LBUG, unless it reaches expire time first, which might be unlikely given that expire time is normally 100 seconds.&lt;/p&gt;

&lt;p&gt;Another thing I do not understand is that why this OST is still being chosen to do procreate if it is not ready yet, probably another bug?&lt;/p&gt;

&lt;p&gt;Please correct me if I misunderstand sth.&lt;/p&gt;



</comment>
                            <comment id="48670" author="bzzz" created="Mon, 3 Dec 2012 12:08:17 +0000"  >&lt;p&gt;yes, sorry for confusion.. i totally forgot my own code. there was a problem long ago when OSP ss reconnecting too frequently leading to a storm of wakeups.&lt;/p&gt;

&lt;p&gt;as for OST being selected - it&apos;s valid as another OSTs might be in the same state (or just empty, for example).&lt;br/&gt;
there is also a race between disconnect and selection procedure.&lt;/p&gt;

&lt;p&gt;to understand whether this was bad choice or not it&apos;s important to know other OST&apos;s states.&lt;/p&gt;</comment>
                            <comment id="49373" author="bzzz" created="Tue, 18 Dec 2012 07:28:42 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/4847&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4847&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="50182" author="bzzz" created="Tue, 8 Jan 2013 23:00:46 +0000"  >&lt;p&gt;the patch is landed&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvbpr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5462</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>