<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:49:47 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5244] conf-sanity test_32b: osp_sync_thread()) ASSERTION( count &lt; 10 ) </title>
                <link>https://jira.whamcloud.com/browse/LU-5244</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for wangdi &amp;lt;di.wang@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/1c06a92c-fa14-11e3-883f-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/1c06a92c-fa14-11e3-883f-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_32b failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;

&lt;p&gt;04:48:35:LustreError: 985:0:(osp_sync.c:994:osp_sync_thread()) ASSERTION( count &amp;lt; 10 ) failed: t32fs-OST0000-osc-MDT0000: 5 5 empty&lt;br/&gt;
04:48:35:LustreError: 985:0:(osp_sync.c:994:osp_sync_thread()) LBUG&lt;br/&gt;
04:48:35:Pid: 985, comm: osp-syn-0-0&lt;br/&gt;
04:48:35:&lt;br/&gt;
04:48:35:Call Trace:&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0742895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0742e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16ca112&amp;gt;&amp;#93;&lt;/span&gt; osp_sync_thread+0x6c2/0x7d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osp&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81061d00&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16c9a50&amp;gt;&amp;#93;&lt;/span&gt; ? osp_sync_thread+0x0/0x7d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osp&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ab56&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aac0&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
04:48:35:&lt;br/&gt;
04:48:35:Kernel panic - not syncing: LBUG&lt;br/&gt;
04:48:35:Pid: 985, comm: osp-syn-0-0 Tainted: G        W  ---------------    2.6.32-431.17.1.el6_lustre.g0eed638.x86_64 #1&lt;br/&gt;
04:48:35:Call Trace:&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8152795f&amp;gt;&amp;#93;&lt;/span&gt; ? panic+0xa7/0x16f&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0742eeb&amp;gt;&amp;#93;&lt;/span&gt; ? lbug_with_loc+0x9b/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16ca112&amp;gt;&amp;#93;&lt;/span&gt; ? osp_sync_thread+0x6c2/0x7d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osp&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81061d00&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16c9a50&amp;gt;&amp;#93;&lt;/span&gt; ? osp_sync_thread+0x0/0x7d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osp&amp;#93;&lt;/span&gt;&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ab56&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x96/0xa0&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0xa/0x20&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aac0&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
04:48:35: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
04:48:35:Initializing cgroup subsys cpuset&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: conf-sanity 32b&lt;/p&gt;</description>
                <environment></environment>
        <key id="25273">LU-5244</key>
            <summary>conf-sanity test_32b: osp_sync_thread()) ASSERTION( count &lt; 10 ) </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="utopiabound">Nathaniel Clark</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 23 Jun 2014 16:36:44 +0000</created>
                <updated>Mon, 30 Jun 2014 17:37:36 +0000</updated>
                            <resolved>Mon, 30 Jun 2014 17:37:36 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="87293" author="jlevi" created="Mon, 23 Jun 2014 17:36:03 +0000"  >&lt;p&gt;Nathaniel,&lt;br/&gt;
Could you have a look at this one?&lt;br/&gt;
Thank you!&lt;/p&gt;</comment>
                            <comment id="87294" author="adilger" created="Mon, 23 Jun 2014 17:39:26 +0000"  >&lt;p&gt;This is a bad LASSERT().  I can&apos;t see any reason why &quot;10&quot; is a magic number before which the remote RPCs need to be completed?  If we hit this on a test system, we will definitely hit this on some customer system when the MDS is busy, or the network is overloaded.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        &lt;span class=&quot;code-comment&quot;&gt;/* wait till all the requests are completed */&lt;/span&gt;
        count = 0;
        &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (d-&amp;gt;opd_syn_rpc_in_progress &amp;gt; 0) {
                osp_sync_process_committed(&amp;amp;env, d);

                lwi = LWI_TIMEOUT(cfs_time_seconds(5), NULL, NULL);
                rc = l_wait_event(d-&amp;gt;opd_syn_waitq,
                                  d-&amp;gt;opd_syn_rpc_in_progress == 0,
                                  &amp;amp;lwi);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc == -ETIMEDOUT)
                        count++;
                LASSERTF(count &amp;lt; 10, &lt;span class=&quot;code-quote&quot;&gt;&quot;%s: %d %d %sempty\n&quot;&lt;/span&gt;,
                         d-&amp;gt;opd_obd-&amp;gt;obd_name, d-&amp;gt;opd_syn_rpc_in_progress,
                         d-&amp;gt;opd_syn_rpc_in_flight,
                         list_empty(&amp;amp;d-&amp;gt;opd_syn_committed_there) ? &lt;span class=&quot;code-quote&quot;&gt;&quot;&quot; : &quot;&lt;/span&gt;!&quot;);

        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;There needs to be proper error handling here, either just to continue looping, or to break out and return an error.&lt;/p&gt;

&lt;p&gt;This was landed as commit 08f093ce2c799faf7a580f53850ecb13d2b71603:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;    LU-2701 osp: wake up sync thread
    
    osp_sync_process_committed() to wake up sync thread when it
    is requested to stop (e.g. umount) and there is no pending
    work left. the patch adds a sanity check to ensure this
    process is not taking too long.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&quot;sanity check&quot; != LASSERT()...&lt;/p&gt;</comment>
                            <comment id="87393" author="adilger" created="Tue, 24 Jun 2014 17:34:36 +0000"  >&lt;p&gt;I&apos;ve bumped this to be a blocker, since it is causing very regular test failures in review-dne-part-1.&lt;/p&gt;</comment>
                            <comment id="87402" author="bzzz" created="Tue, 24 Jun 2014 18:53:16 +0000"  >&lt;p&gt;the idea was that at umount we invalidate the import and this should cause RPCs in-flight to abort quickly. I&apos;m not very familiar with lnet internals and not sure the abort is very promptly in all the cases. I think it makes sense to see what&apos;s going on and why the RPCs weren&apos;t aborted in time.&lt;/p&gt;</comment>
                            <comment id="87424" author="utopiabound" created="Tue, 24 Jun 2014 21:47:10 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/10805&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10805&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87427" author="adilger" created="Tue, 24 Jun 2014 22:21:36 +0000"  >&lt;p&gt;The patch avoids the crash, but so far there isn&apos;t any explanation about why this started failing so seriously.&lt;/p&gt;</comment>
                            <comment id="87512" author="adilger" created="Wed, 25 Jun 2014 17:51:45 +0000"  >&lt;p&gt;Unfortunately, both &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5244&quot; title=&quot;conf-sanity test_32b: osp_sync_thread()) ASSERTION( count &amp;lt; 10 ) &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5244&quot;&gt;&lt;del&gt;LU-5244&lt;/del&gt;&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5249&quot; title=&quot;conf-sanity test_32a: NULL pointer in fld_local_lookup&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5249&quot;&gt;&lt;del&gt;LU-5249&lt;/del&gt;&lt;/a&gt; are causing so many test failures that it may not be possible for them to land independently, so re triggering them may not be enough. Instead, basing one patch on the other would allow the second to pass, then it could be landed, then the first one rebased and landed. &lt;/p&gt;

&lt;p&gt;Also, reverting the patch that is the root of these problems may fix both issues at once. &lt;/p&gt;</comment>
                            <comment id="87814" author="jlevi" created="Mon, 30 Jun 2014 17:37:36 +0000"  >&lt;p&gt;Duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5188&quot; title=&quot;nbp6-OST002f-osc-MDT0000: invalid setattr record, lsr_valid:0&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5188&quot;&gt;&lt;del&gt;LU-5188&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="25287">LU-5249</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="25134">LU-5188</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwppj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14626</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>