<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:51:18 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5415] High ldlm_poold load on client</title>
                <link>https://jira.whamcloud.com/browse/LU-5415</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When LRU resizing is enabled on client, sometimes, ldlm_poold have extremely high CPU load. And at the meantime, schedule_timeout() complains about negative timeout. After some time, the problem will recover without any manual intervention. But it happens really frequently when the file system is under high load.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;top - 09:48:51 up 6 days, 11:17,  2 users,  load average: 1.00, 1.01, 1.00
Tasks: 516 total,   2 running, 514 sleeping,   0 stopped,   0 zombie
Cpu(s):  0.1%us,  6.4%sy,  0.0%ni, 93.4%id,  0.1%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:  65903880k total, 24300068k used, 41603812k free,   346516k buffers
Swap: 65535992k total,        0k used, 65535992k free, 18665656k cached

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
 37976 root      20   0     0    0    0 R 99.4  0.0   2412:25 ldlm_bl_04

Jul 13 12:49:30 mu01 kernel: LustreError: 11-0: lustre-OST000a-osc-ffff88080fdad800: Communicating with 10.0.2.2@o2ib, operation obd_ping failed with -107.
Jul 13 12:49:30 mu01 kernel: Lustre: lustre-OST000a-osc-ffff88080fdad800: Connection to lustre-OST000a (at 10.0.2.2@o2ib) was lost; in progress operations using this service will wait for recovery to complete
Jul 13 12:49:30 mu01 kernel: LustreError: 167-0: lustre-OST000a-osc-ffff88080fdad800: This client was evicted by lustre-OST000a; in progress operations using this service will fail.
Jul 13 12:49:31 mu01 kernel: schedule_timeout: wrong timeout value fffffffff5c2c8c0
Jul 13 12:49:31 mu01 kernel: Pid: 4054, comm: ldlm_poold Tainted: G           ---------------  T 2.6.32-279.el6.x86_64 #1
Jul 13 12:49:31 mu01 kernel: Call Trace:
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffff814fe759&amp;gt;] ? schedule_timeout+0x2c9/0x2e0
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffffa086612b&amp;gt;] ? ldlm_pool_recalc+0x10b/0x130 [ptlrpc]
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffffa084cfb9&amp;gt;] ? ldlm_namespace_put+0x29/0x60 [ptlrpc]
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffffa08670b0&amp;gt;] ? ldlm_pools_thread_main+0x1d0/0x2f0 [ptlrpc]
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffff81060250&amp;gt;] ? default_wake_function+0x0/0x20
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffffa0866ee0&amp;gt;] ? ldlm_pools_thread_main+0x0/0x2f0 [ptlrpc]
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffff81091d66&amp;gt;] ? kthread+0x96/0xa0
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffff8100c14a&amp;gt;] ? child_rip+0xa/0x20
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffff81091cd0&amp;gt;] ? kthread+0x0/0xa0
Jul 13 12:49:31 mu01 kernel: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
Jul 13 12:49:33 mu01 kernel: Lustre: lustre-OST000a-osc-ffff88080fdad800: Connection restored to lustre-OST000a (at 10.0.2.2@o2ib)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="25745">LU-5415</key>
            <summary>High ldlm_poold load on client</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="lixi">Li Xi</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Fri, 25 Jul 2014 03:14:30 +0000</created>
                <updated>Thu, 29 Oct 2015 17:25:04 +0000</updated>
                            <resolved>Thu, 14 Aug 2014 14:05:21 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                                    <fixVersion>Lustre 2.7.0</fixVersion>
                    <fixVersion>Lustre 2.5.3</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="90025" author="lixi" created="Fri, 25 Jul 2014 03:17:05 +0000"  >&lt;p&gt;We&apos;ve seen this problem for a long time on variable systems. Ususally, as a walk around, we disable LRU resizing of ldlm on client. But maybe following patch can help.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/11227&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11227&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="90029" author="pjones" created="Fri, 25 Jul 2014 05:59:00 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please review this patch?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="90073" author="adilger" created="Fri, 25 Jul 2014 17:31:05 +0000"  >&lt;p&gt;Was this problem actually seen on Lustre 2.6/master or some other version?  There were patches from Oleg that were landed for 2.5 that addressed some problems with LDLM pools, but I&apos;m happy to see more improvements in this area. &lt;/p&gt;</comment>
                            <comment id="90104" author="adilger" created="Sat, 26 Jul 2014 01:10:10 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/6234&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/6234&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/5793&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5793&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/5624&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/5624&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="90117" author="pjones" created="Sat, 26 Jul 2014 14:55:08 +0000"  >&lt;p&gt;So the patches Andreas mentions would be included on any 2.5.x based branches.&lt;/p&gt;</comment>
                            <comment id="90118" author="lixi" created="Sat, 26 Jul 2014 15:04:30 +0000"  >&lt;p&gt;Yeah, those patches are included on the branch which has this problem. This problem is happening quite frequently,&lt;/p&gt;</comment>
                            <comment id="90160" author="laisiyao" created="Mon, 28 Jul 2014 13:56:19 +0000"  >&lt;p&gt;Peter, I&apos;ll be on vacation from tomorrow, could you reassign to others?&lt;/p&gt;</comment>
                            <comment id="90161" author="pjones" created="Mon, 28 Jul 2014 14:01:06 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please look after this patch?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="91258" author="green" created="Mon, 11 Aug 2014 04:31:43 +0000"  >&lt;p&gt;I wonder what sort of lists do you have on the client side that cause iteration of said lists to take over a second (so that the time becomes negative)? Could the problem be somewhere else and this proposed change is just papering over the real issue?&lt;/p&gt;</comment>
                            <comment id="91261" author="lixi" created="Mon, 11 Aug 2014 05:00:07 +0000"  >&lt;p&gt;Yeah, that is very possible that the patch is not fixing the root cause. And it nearly becomes a common knowledge that  LRU resizing of ldlm on client should be disabled, otherwise there will be ldlm high load. Is there any garantee that LRU resizing will complete in a determined period of time? If not, then the current codes has problem any way. And I found some other issues which had negative timeout values.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-1733&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-1733&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-4480&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-4480&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://jira.hpdd.intel.com/browse/DDN-58&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/DDN-58&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="91391" author="lixi" created="Tue, 12 Aug 2014 15:05:00 +0000"  >&lt;p&gt;Patch for b2_5:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/11414&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11414&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="91609" author="pjones" created="Thu, 14 Aug 2014 14:05:21 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="17790">LU-2924</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="22869">LU-4536</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwsan:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15059</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>