<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:47:52 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5023] sanity-lfsck test_12: OST lfsck_stop hung</title>
                <link>https://jira.whamcloud.com/browse/LU-5023</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Bob Glossman &amp;lt;bob.glossman@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/4d306b04-d1ed-11e3-a9d1-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/4d306b04-d1ed-11e3-a9d1-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;searching in maloo I see 8 similar failures reported starting 5/1/2014&lt;br/&gt;
suspect something happened then to start causing these failures&lt;br/&gt;
Don&apos;t think it&apos;s specific to the mod under test.&lt;/p&gt;


&lt;p&gt;The sub-test test_12 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity-lfsck 12&lt;/p&gt;</description>
                <environment></environment>
        <key id="24610">LU-5023</key>
            <summary>sanity-lfsck test_12: OST lfsck_stop hung</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Wed, 7 May 2014 18:17:30 +0000</created>
                <updated>Tue, 3 Feb 2015 18:18:52 +0000</updated>
                            <resolved>Tue, 3 Feb 2015 18:18:52 +0000</resolved>
                                                    <fixVersion>Lustre 2.7.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="83539" author="green" created="Thu, 8 May 2014 17:18:53 +0000"  >&lt;p&gt;There appears to be some deadlock on ost:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;01:01:32:INFO: task ll_ost00_004:12219 blocked for more than 120 seconds.
01:01:32:      Not tainted 2.6.32-431.11.2.el6_lustre.g324fa81.x86_64 #1
01:01:32:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
01:01:32:ll_ost00_004  D 0000000000000000     0 12219      2 0x00000080
01:01:32: ffff880079891bb0 0000000000000046 0000000000000000 ffff88006c70facd
01:01:32: ffffffffa08c3b3d ffffffffa08c3b3b ffff880079891cc0 ffff88006c710000
01:01:32: ffff88007d7fe5f8 ffff880079891fd8 000000000000fbc8 ffff88007d7fe5f8
01:01:32:Call Trace:
01:01:32: [&amp;lt;ffffffff81529b0e&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
01:01:32: [&amp;lt;ffffffffa0864143&amp;gt;] ? null_alloc_rs+0xf3/0x390 [ptlrpc]
01:01:32: [&amp;lt;ffffffff815299ab&amp;gt;] mutex_lock+0x2b/0x50
01:01:32: [&amp;lt;ffffffffa0dd9cff&amp;gt;] lfsck_stop+0x12f/0x410 [lfsck]
01:01:32: [&amp;lt;ffffffffa0827505&amp;gt;] ? lustre_msg_buf+0x55/0x60 [ptlrpc]
01:01:32: [&amp;lt;ffffffffa084f086&amp;gt;] ? __req_capsule_get+0x166/0x6e0 [ptlrpc]
01:01:32: [&amp;lt;ffffffffa0dde214&amp;gt;] lfsck_in_notify+0x444/0x5f0 [lfsck]
01:01:32: [&amp;lt;ffffffffa0882852&amp;gt;] tgt_handle_lfsck_notify+0x62/0x150 [ptlrpc]
01:01:32: [&amp;lt;ffffffffa088a93c&amp;gt;] tgt_request_handle+0x23c/0xac0 [ptlrpc]
01:01:32: [&amp;lt;ffffffffa0839b3a&amp;gt;] ptlrpc_main+0xd1a/0x1980 [ptlrpc]
01:01:32: [&amp;lt;ffffffffa0838e20&amp;gt;] ? ptlrpc_main+0x0/0x1980 [ptlrpc]
01:01:32: [&amp;lt;ffffffff8109aee6&amp;gt;] kthread+0x96/0xa0
01:01:33: [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
01:01:33: [&amp;lt;ffffffff8109ae50&amp;gt;] ? kthread+0x0/0xa0
01:01:33: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="83543" author="adilger" created="Thu, 8 May 2014 17:30:34 +0000"  >&lt;p&gt;LFSCK patches that have landed around that time:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit a8c9796fddaeda0572a64cedb471b2eead78c506
CommitDate: Tue Apr 29 17:22:17 2014 +0000
    LU-4941 lfsck: check LOV EA header properly
    Reviewed-on: http://review.whamcloud.com/10045
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit 63e7b15a1baa1aae03dfdc965e0777cf754ff29a
CommitDate: Wed Apr 30 03:41:14 2014 +0000
    LU-4895 lfsck: not create object to repair dangling by default    
    Reviewed-on: http://review.whamcloud.com/9989
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit 1dbba329174e6c7f7712f01fc4e44c44400fbc92
CommitDate: Wed Apr 30 03:42:00 2014 +0000
    LU-4556 tests: speed up sanity-lfsck and sanity-scrub tests
    Reviewed-on: http://review.whamcloud.com/9704
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Not sure if any of these are the cause, but likely candidates.  Jodi thought that this hadn&apos;t been hit since the initial group of failures, so it is possible that a later patch may have fixed it?&lt;/p&gt;</comment>
                            <comment id="83591" author="yong.fan" created="Fri, 9 May 2014 01:27:57 +0000"  >&lt;p&gt;It cannot say that there is deadlock on the OST. The real case is probably like that:&lt;/p&gt;

&lt;p&gt;1) The LFSCK thread on the OST is doing something but it is blocked.&lt;br/&gt;
2) At that time, the first lfsck_stop RPC from the MDT comes in, the RPC handler (on the OST) holds the lfsck::li_mutex to signal the LFSCK thread, then wait until the LFSCK thread stopped.&lt;br/&gt;
3) Because the LFSCK thread is blocked on some unknown event, it cannot respond the stop signal, so the first stop RPC is blocked there.&lt;br/&gt;
4) Then the subsequent lfsck_stop RPC handlers will be blocked at the lfsck::li_mutex by the first lfsck_stop RPC handler.&lt;/p&gt;

&lt;p&gt;So we need to find out what the LFSCK thread was doing at that time. Unfortunately, the current log was not enough to indicate that. I will make a debug patch to collect more logs.&lt;/p&gt;</comment>
                            <comment id="83594" author="yong.fan" created="Fri, 9 May 2014 01:36:40 +0000"  >&lt;p&gt;The debug patch for more logs:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/10276&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10276&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="83640" author="bogl" created="Fri, 9 May 2014 16:37:01 +0000"  >&lt;p&gt;more:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/2eada7a0-d4c7-11e3-808a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/2eada7a0-d4c7-11e3-808a-52540035b04c&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/465c7ec4-d33e-11e3-a102-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/465c7ec4-d33e-11e3-a102-52540035b04c&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/542c478e-d75b-11e3-9d16-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/542c478e-d75b-11e3-9d16-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="83819" author="yong.fan" created="Mon, 12 May 2014 10:29:49 +0000"  >&lt;p&gt;&amp;gt; &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/2eada7a0-d4c7-11e3-808a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/2eada7a0-d4c7-11e3-808a-52540035b04c&lt;/a&gt;&lt;br/&gt;
&amp;gt; &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/465c7ec4-d33e-11e3-a102-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/465c7ec4-d33e-11e3-a102-52540035b04c&lt;/a&gt;&lt;br/&gt;
&amp;gt; &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/542c478e-d75b-11e3-9d16-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/542c478e-d75b-11e3-9d16-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;In above failure cases, the test_11b has failed because improperly FID seq checking and error handler. I have updated the patch 10276 to resolve that.&lt;/p&gt;</comment>
                            <comment id="105531" author="gerrit" created="Tue, 3 Feb 2015 18:01:51 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/10276/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10276/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5023&quot; title=&quot;sanity-lfsck test_12: OST lfsck_stop hung&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5023&quot;&gt;&lt;del&gt;LU-5023&lt;/del&gt;&lt;/a&gt; tests: check FID seq properly for sanity-lfsck t_11b&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 89a53691076ba45ef415de511ed840a0e3cbdd2c&lt;/p&gt;</comment>
                            <comment id="105539" author="pjones" created="Tue, 3 Feb 2015 18:18:52 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="24685">LU-5049</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwm1z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13903</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>