<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:29:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2944] Client evictions - watchdog timeouts on MDT - iorfpp</title>
                <link>https://jira.whamcloud.com/browse/LU-2944</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running parallel-scale IOR fpp test. at end of test. 60 clients report ENOTCONN, then are evicted from MDT due to lock callback timeout:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Mar  8 15:47:22 hyperion-rst6 kernel: LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 477s: evicting client at 192.168.117.65@o2ib1  ns: mdt-ffff8802f7d21000 lock: ffff8801193bfe00/0xb4fc8ee670e8bb9a lrc: 3/0,0 mode: CR/CR res: 8589935754/45677 bits 0x9 rrc: 2 type: IBT flags: 0x200000000020 nid: 192.168.117.65@o2ib1 remote: 0xc3b8b1de83cb4606 expref: 81 pid: 11951 timeout: 4379843546 lvb_type: 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;After long delay, system is idle and MDT is now watchdogging:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Mar  8 17:03:57 hyperion-rst6 kernel: LNet: Service thread pid 15804 was inactive &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 304.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; debugging purposes:
Mar  8 17:03:57 hyperion-rst6 kernel: Pid: 15804, comm: mdt03_035
Mar  8 17:03:57 hyperion-rst6 kernel:
Mar  8 17:03:57 hyperion-rst6 kernel: Call Trace:  
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffff814ead12&amp;gt;] schedule_timeout+0x192/0x2e0
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffff8107cb50&amp;gt;] ? process_timeout+0x0/0x10
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa06df6d1&amp;gt;] cfs_waitq_timedwait+0x11/0x20 [libcfs]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa096b22d&amp;gt;] ldlm_completion_ast+0x4ed/0x960 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0966950&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x390 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffff8105fa40&amp;gt;] ? default_wake_function+0x0/0x20
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa096a968&amp;gt;] ldlm_cli_enqueue_local+0x1f8/0x5d0 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa096ad40&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f3ac60&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f3d92b&amp;gt;] mdt_object_lock0+0x33b/0xaf0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f3ac60&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa096ad40&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f3e1a4&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f5e5a9&amp;gt;] mdt_reint_unlink+0x5b9/0xdf0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f59781&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f52de3&amp;gt;] mdt_reint_internal+0x4e3/0x7d0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f53114&amp;gt;] mdt_reint+0x44/0xe0 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f44008&amp;gt;] mdt_handle_common+0x628/0x1620 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f7c6e5&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa09a404c&amp;gt;] ptlrpc_server_handle_request+0x41c/0xdf0 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa06df5de&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa099b799&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffff81052223&amp;gt;] ? __wake_up+0x53/0x70
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa09a5596&amp;gt;] ptlrpc_main+0xb76/0x1870 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa09a4a20&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa09a4a20&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffffa09a4a20&amp;gt;] ? ptlrpc_main+0x0/0x1870 [ptlrpc]
Mar  8 17:03:57 hyperion-rst6 kernel: [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;After a period, evicted clients are unable to reconnect, MDT reports &apos;busy with 1 RPC&apos; &lt;/p&gt;</description>
                <environment>Hyperion/LLNL RHEL6</environment>
        <key id="17823">LU-2944</key>
            <summary>Client evictions - watchdog timeouts on MDT - iorfpp</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                            <label>MB</label>
                    </labels>
                <created>Mon, 11 Mar 2013 12:15:32 +0000</created>
                <updated>Tue, 4 Feb 2014 14:50:57 +0000</updated>
                            <resolved>Wed, 13 Mar 2013 17:15:16 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="53719" author="green" created="Mon, 11 Mar 2013 14:53:53 +0000"  >&lt;p&gt;We really need more info than that, like sysrq-t or the like.&lt;br/&gt;
a bigger log to see all such threads hung...&lt;/p&gt;

&lt;p&gt;Does this happen every time?&lt;/p&gt;</comment>
                            <comment id="53948" author="cliffw" created="Wed, 13 Mar 2013 16:58:16 +0000"  >&lt;p&gt;No, i have repeated the parallel-scale tests, and did not have evictions on the second run. You can close, i can re-open if i get a repeat.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="22968">LU-4572</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="16834">LU-2419</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvklj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7066</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>