<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:54:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5816] Silence misleading kernel message&quot;task tgt_recov:XXX blocked for more than 120 seconds&quot;</title>
                <link>https://jira.whamcloud.com/browse/LU-5816</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;when recovery takes prolonged time due to waiting for all of the clients to reconnect, the recovery task triggers a the hangcheck timer.&lt;/p&gt;

&lt;p&gt;There is likely a way to tell the kernel we are fine or otherwise have some sort of a ping happen to avoid this message that unnecessary scares users.&lt;/p&gt;</description>
                <environment></environment>
        <key id="27340">LU-5816</key>
            <summary>Silence misleading kernel message&quot;task tgt_recov:XXX blocked for more than 120 seconds&quot;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Tue, 28 Oct 2014 04:32:12 +0000</created>
                <updated>Tue, 4 Aug 2015 17:11:37 +0000</updated>
                            <resolved>Tue, 3 Feb 2015 18:17:24 +0000</resolved>
                                    <version>Lustre 2.5.4</version>
                                    <fixVersion>Lustre 2.7.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>12</watches>
                                                                            <comments>
                            <comment id="97719" author="adilger" created="Tue, 28 Oct 2014 17:30:28 +0000"  >&lt;p&gt;Oleg, can you please paste the stack into the bug, so we can see where it is blocked?&lt;/p&gt;</comment>
                            <comment id="97721" author="jlevi" created="Tue, 28 Oct 2014 17:32:45 +0000"  >&lt;p&gt;Bob,&lt;br/&gt;
Could you have a  look at this one and comment please?&lt;br/&gt;
Thank you!&lt;/p&gt;</comment>
                            <comment id="97729" author="bogl" created="Tue, 28 Oct 2014 18:16:33 +0000"  >&lt;p&gt;not a lot of info to work with here yet.  the &quot;blocked for more than&quot; message comes from check_hung_task() in the kernel when some task is in an uninterruptible sleep for too long.  need some clue as to where lustre is sleeping.  don&apos;t know if there is any kernel internal API to delay or suppress these warnings.&lt;/p&gt;

&lt;p&gt;Oh, I see Andreas already requested a stack trace on the hang.  I agree that would be very useful info.&lt;/p&gt;</comment>
                            <comment id="98025" author="ezell" created="Fri, 31 Oct 2014 03:32:04 +0000"  >&lt;p&gt;Messages during recovery like&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[  844.502040] INFO: task tgt_recov:19764 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
[  844.509756]       Not tainted 2.6.32-431.17.1.el6.wc.x86_64 #1
[  844.516404] &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
[  844.525393] tgt_recov     D 0000000000000003     0 19764      2 0x00000000
[  844.533247]  ffff881fcaed1da0 0000000000000046 0000000000000000 ffff881fcaed1d64
[  844.541824]  0000009100000000 ffff88207fc28800 ffff88011c456880 0000000000000400
[  844.550425]  ffff881fcaecb058 ffff881fcaed1fd8 000000000000fbc8 ffff881fcaecb058
[  844.559077] Call Trace:
[  844.561987]  [&amp;lt;ffffffffa07ca620&amp;gt;] ? check_for_clients+0x0/0x70 [ptlrpc]
[  844.569538]  [&amp;lt;ffffffffa07cbc8d&amp;gt;] target_recovery_overseer+0x9d/0x230 [ptlrpc]
[  844.577874]  [&amp;lt;ffffffffa07ca310&amp;gt;] ? exp_connect_healthy+0x0/0x20 [ptlrpc]
[  844.587551]  [&amp;lt;ffffffff8109af00&amp;gt;] ? autoremove_wake_function+0x0/0x40
[  844.594898]  [&amp;lt;ffffffffa07d2550&amp;gt;] ? target_recovery_thread+0x0/0x1920 [ptlrpc]
[  844.603226]  [&amp;lt;ffffffffa07d2a90&amp;gt;] target_recovery_thread+0x540/0x1920 [ptlrpc]
[  844.611534]  [&amp;lt;ffffffff81061d12&amp;gt;] ? default_wake_function+0x12/0x20
[  844.618690]  [&amp;lt;ffffffffa07d2550&amp;gt;] ? target_recovery_thread+0x0/0x1920 [ptlrpc]
[  844.627004]  [&amp;lt;ffffffff8109ab56&amp;gt;] kthread+0x96/0xa0
[  844.632593]  [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
[  844.638274]  [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
[  844.643947]  [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;and&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[ 1805.700512] INFO: task tgt_recov:19764 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
[ 1805.708227]       Not tainted 2.6.32-431.17.1.el6.wc.x86_64 #1
[ 1805.714870] &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
[ 1805.723897] tgt_recov     D 000000000000000b     0 19764      2 0x00000000
[ 1805.731776]  ffff881fcaed1da0 0000000000000046 0000000000000000 ffff883f9ac0ba21
[ 1805.740357]  ffffc900802298f8 ffff88402a8af430 0000000000004ed2 0000000000004ece
[ 1805.748926]  ffff881fcaecb058 ffff881fcaed1fd8 000000000000fbc8 ffff881fcaecb058
[ 1805.757503] Call Trace:
[ 1805.760426]  [&amp;lt;ffffffffa07cea60&amp;gt;] ? check_for_next_transno+0x0/0x590 [ptlrpc]
[ 1805.768553]  [&amp;lt;ffffffffa07cbc8d&amp;gt;] target_recovery_overseer+0x9d/0x230 [ptlrpc]
[ 1805.776888]  [&amp;lt;ffffffffa07ca330&amp;gt;] ? exp_req_replay_healthy+0x0/0x30 [ptlrpc]
[ 1805.784921]  [&amp;lt;ffffffff8109af00&amp;gt;] ? autoremove_wake_function+0x0/0x40
[ 1805.792266]  [&amp;lt;ffffffffa07d2cba&amp;gt;] target_recovery_thread+0x76a/0x1920 [ptlrpc]
[ 1805.800565]  [&amp;lt;ffffffff81061d12&amp;gt;] ? default_wake_function+0x12/0x20
[ 1805.807732]  [&amp;lt;ffffffffa07d2550&amp;gt;] ? target_recovery_thread+0x0/0x1920 [ptlrpc]
[ 1805.816039]  [&amp;lt;ffffffff8109ab56&amp;gt;] kthread+0x96/0xa0
[ 1805.821621]  [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
[ 1805.827287]  [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
[ 1805.832961]  [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;target_recovery_overseer() calls wait_event(), but it may take more than 120 seconds for the event to happen.  The solution might be to use wait_event_timeout() in a loop to wake back up after 90% of sysctl_hung_task_timeout_secs has elapsed.&lt;/p&gt;</comment>
                            <comment id="98886" author="hongchao.zhang" created="Tue, 11 Nov 2014 15:00:17 +0000"  >&lt;p&gt;the patch is tracked at &lt;a href=&quot;http://review.whamcloud.com/#/c/12672/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/12672/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="101682" author="yujian" created="Tue, 16 Dec 2014 03:31:28 +0000"  >&lt;p&gt;While testing patch &lt;a href=&quot;http://review.whamcloud.com/13046&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13046&lt;/a&gt; on Lustre b2_5 branch, conf-sanity test 47 hung.&lt;/p&gt;

&lt;p&gt;On OSS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;06:13:45:INFO: task tgt_recov:3507 blocked for more than 120 seconds.
06:13:45:      Not tainted 2.6.32-431.29.2.el6_lustre.gbb46ddc.x86_64 #1
06:13:45:&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
06:13:45:tgt_recov     D 0000000000000000     0  3507      2 0x00000080
06:13:45: ffff880066fdbda0 0000000000000046 0000000000000000 ffff880074f078c4
06:13:45: 00000053ffffff0a ffffffffa07f79a6 0000000000000064 0008000000010000
06:13:45: ffff880071556638 ffff880066fdbfd8 000000000000fbc8 ffff880071556638
06:13:45:Call Trace:
06:13:45: [&amp;lt;ffffffff8109b2ce&amp;gt;] ? prepare_to_wait+0x4e/0x80
06:13:45: [&amp;lt;ffffffffa073b910&amp;gt;] ? check_for_next_lock+0x0/0x270 [ptlrpc]
06:13:45: [&amp;lt;ffffffffa073cc9d&amp;gt;] target_recovery_overseer+0x9d/0x230 [ptlrpc]
06:13:45: [&amp;lt;ffffffffa073b370&amp;gt;] ? exp_lock_replay_healthy+0x0/0x30 [ptlrpc]
06:13:45: [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
06:13:45: [&amp;lt;ffffffffa074121e&amp;gt;] target_recovery_thread+0x96e/0x1960 [ptlrpc]
06:13:45: [&amp;lt;ffffffff81061d12&amp;gt;] ? default_wake_function+0x12/0x20
06:13:45: [&amp;lt;ffffffffa07408b0&amp;gt;] ? target_recovery_thread+0x0/0x1960 [ptlrpc]
06:13:45: [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
06:13:45: [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
06:13:45: [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
06:13:45: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/97a07574-8473-11e4-8915-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/97a07574-8473-11e4-8915-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="105528" author="gerrit" created="Tue, 3 Feb 2015 17:54:02 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/12672/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12672/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5816&quot; title=&quot;Silence misleading kernel message&amp;quot;task tgt_recov:XXX blocked for more than 120 seconds&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5816&quot;&gt;&lt;del&gt;LU-5816&lt;/del&gt;&lt;/a&gt; target: don&apos;t trigger watchdog waiting in recovery&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: ba0a1b36870807e8182189bcb08f7b105aff6c57&lt;/p&gt;</comment>
                            <comment id="105537" author="pjones" created="Tue, 3 Feb 2015 18:17:24 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                            <comment id="105636" author="gerrit" created="Wed, 4 Feb 2015 02:21:37 +0000"  >&lt;p&gt;Jian Yu (jian.yu@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13623&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13623&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5816&quot; title=&quot;Silence misleading kernel message&amp;quot;task tgt_recov:XXX blocked for more than 120 seconds&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5816&quot;&gt;&lt;del&gt;LU-5816&lt;/del&gt;&lt;/a&gt; target: don&apos;t trigger watchdog waiting in recovery&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c1d6ebbaaf7d598622fd7a6319c97bb29ce68805&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="27313">LU-5805</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwzmn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16310</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>