<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:54:31 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5787] ptlrpcd_rcv loop in osc_ldlm_weigh_ast</title>
                <link>https://jira.whamcloud.com/browse/LU-5787</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt; At Cea T100 system we have an issue similar than the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5781&quot; title=&quot;endless loop in osc_lock_weight()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5781&quot;&gt;&lt;del&gt;LU-5781&lt;/del&gt;&lt;/a&gt;, after a failover server node some client nodes have the thread ptlrpcd_rcv who use 100% of one cpu, with perf we can see :&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;      0.55%      ptlrpcd_rcv  [obdclass]               [k] cl_page_at_trusted
                |
                --- cl_page_at_trusted
                   |
                   |--97.59%-- cl_page_gang_lookup
                   |          osc_ldlm_weigh_ast
                   |          osc_cancel_for_recovery
                   |          ldlm_cancel_no_wait_policy
                   |          ldlm_prepare_lru_list
                   |          ldlm_cancel_lru_local
                   |          ldlm_replay_locks
                   |          ptlrpc_import_recovery_state_machine
                   |          ptlrpc_connect_interpret
                   |          ptlrpc_check_set
                   |          ptlrpcd_check
                   |          ptlrpcd
                   |          kthread
                   |          child_rip
                   |
                    --2.41%-- osc_ldlm_weigh_ast
                              osc_cancel_for_recovery
                              ldlm_cancel_no_wait_policy
                              ldlm_prepare_lru_list
                              ldlm_cancel_lru_local
                              ldlm_replay_locks
                              ptlrpc_import_recovery_state_machine
                              ptlrpc_connect_interpret
                              ptlrpc_check_set
                              ptlrpcd_check
                              ptlrpcd
                              kthread
                              child_rip  
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;  we have some osc with the state  :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;    /proc/fs/lustre/osc/ptmp2-OST0021-osc-ffff8801ff8b3800/state:current_state: CONNECTING
    /proc/fs/lustre/osc/ptmp2-OST0022-osc-ffff8801ff8b3800/state:current_state: REPLAY_LOCKS
    /proc/fs/lustre/osc/ptmp2-OST0023-osc-ffff8801ff8b3800/state:current_state: REPLAY_LOCKS
    /proc/fs/lustre/osc/ptmp2-OST0024-osc-ffff8801ff8b3800/state:current_state: CONNECTING
    /proc/fs/lustre/osc/ptmp2-OST0025-osc-ffff8801ff8b3800/state:current_state: REPLAY_LOCKS
    /proc/fs/lustre/osc/ptmp2-OST0026-osc-ffff8801ff8b3800/state:current_state: CONNECTING
    /proc/fs/lustre/osc/ptmp2-OST0027-osc-ffff8801ff8b3800/state:current_state: CONNECTING
    /proc/fs/lustre/osc/ptmp2-OST0028-osc-ffff8801ff8b3800/state:current_state: CONNECTING
    /proc/fs/lustre/osc/ptmp2-OST0029-osc-ffff8801ff8b3800/state:current_state: REPLAY_LOCKS
    /proc/fs/lustre/osc/ptmp2-OST002a-osc-ffff8801ff8b3800/state:current_state: CONNECTING
    /proc/fs/lustre/osc/ptmp2-OST002b-osc-ffff8801ff8b3800/state:current_state: REPLAY_WAIT
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;  and also,in some case,  it is possible to release the state to FULL after running&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;lctl set_param ldlm.namespaces.*.lru_size=clear&lt;br/&gt;
  or&lt;/li&gt;
	&lt;li&gt;echo 1 &amp;gt; /proc/sys/vm/drop_caches&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt; and after a NMI ptlrpcd_rcv stack was :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;     crash&amp;gt; bt 15400
    PID: 15400  TASK: ffff880c7c5a0100  CPU: 14  COMMAND: &quot;ptlrpcd_rcv&quot;
     #0 [ffff88088e4c7e90] crash_nmi_callback at ffffffff81030096
     #1 [ffff88088e4c7ea0] notifier_call_chain at ffffffff8152f9d5
     #2 [ffff88088e4c7ee0] atomic_notifier_call_chain at ffffffff8152fa3a
     #3 [ffff88088e4c7ef0] notify_die at ffffffff810a056e
     #4 [ffff88088e4c7f20] do_nmi at ffffffff8152d69b
     #5 [ffff88088e4c7f50] nmi at ffffffff8152cf60
        [exception RIP: cl_page_gang_lookup+292]
        RIP: ffffffffa04f18b4  RSP: ffff880c7cabb990  RFLAGS: 00000206
        RAX: 000000000000000a  RBX: 000000000000000b  RCX: 0000000000000000
        RDX: ffff880660a63da8  RSI: ffffffffa0af8740  RDI: ffff88065b15ae00
        RBP: ffff880c7cabba30   R8: 000000000000000e   R9: ffff880c7cabb950
        R10: 0000000000002362  R11: ffff88087a09e5d0  R12: ffff88065b15a800
        R13: ffff880660a63df8  R14: 000000000000000b  R15: 000000000000000e
        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    --- &amp;lt;NMI exception stack&amp;gt; ---
     #6 [ffff880c7cabb990] cl_page_gang_lookup at ffffffffa04f18b4 [obdclass]
     #7 [ffff880c7cabba38] osc_ldlm_weigh_ast at ffffffffa095e9b7 [osc]
     #8 [ffff880c7cabbab8] osc_cancel_for_recovery at ffffffffa094305d [osc]
     #9 [ffff880c7cabbac8] ldlm_cancel_no_wait_policy at ffffffffa0637711 [ptlrpc]
    #10 [ffff880c7cabbae8] ldlm_prepare_lru_list at ffffffffa063b61b [ptlrpc]
    #11 [ffff880c7cabbb68] ldlm_cancel_lru_local at ffffffffa063ba34 [ptlrpc]
    #12 [ffff880c7cabbb88] ldlm_replay_locks at ffffffffa063bbbc [ptlrpc]
    #13 [ffff880c7cabbc08] ptlrpc_import_recovery_state_machine at ffffffffa06844f7 [ptlrpc]
    #14 [ffff880c7cabbc68] ptlrpc_connect_interpret at ffffffffa0685659 [ptlrpc]
    #15 [ffff880c7cabbd08] ptlrpc_check_set at ffffffffa065bbc1 [ptlrpc]
    #16 [ffff880c7cabbda8] ptlrpcd_check at ffffffffa0687f9b [ptlrpc]
    #17 [ffff880c7cabbe08] ptlrpcd at ffffffffa06884bb [ptlrpc]
    #18 [ffff880c7cabbee8] kthread at ffffffff81099f56
    #19 [ffff880c7cabbf48] kernel_thread at ffffffff8100c20a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;when the root will be understanding on &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5781&quot; title=&quot;endless loop in osc_lock_weight()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5781&quot;&gt;&lt;del&gt;LU-5781&lt;/del&gt;&lt;/a&gt;, we need a patch version for lustre 2.5.3&lt;/p&gt;</description>
                <environment>kernel 2.6.32-431.23.3 + bull fix&lt;br/&gt;
lustre 2.5.3 + bull fix</environment>
        <key id="27158">LU-5787</key>
            <summary>ptlrpcd_rcv loop in osc_ldlm_weigh_ast</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="4" iconUrl="https://jira.whamcloud.com/images/icons/statuses/reopened.png" description="This issue was once resolved, but the resolution was deemed incorrect. From here issues are either marked assigned or resolved.">Reopened</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="apercher">Antoine Percher</reporter>
                        <labels>
                    </labels>
                <created>Wed, 22 Oct 2014 13:28:17 +0000</created>
                <updated>Tue, 7 Jun 2016 15:38:28 +0000</updated>
                                            <version>Lustre 2.5.3</version>
                                                        <due></due>
                            <votes>1</votes>
                                    <watches>12</watches>
                                                                            <comments>
                            <comment id="97050" author="bfaccini" created="Wed, 22 Oct 2014 20:02:14 +0000"  >&lt;p&gt;Hello Antoine!&lt;br/&gt;
Looks like problem has been already well identified in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5781&quot; title=&quot;endless loop in osc_lock_weight()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5781&quot;&gt;&lt;del&gt;LU-5781&lt;/del&gt;&lt;/a&gt;, so a master patch will come up soon for master, and I presume will likely be easy to back-port in b2_5.&lt;/p&gt;</comment>
                            <comment id="97057" author="pjones" created="Wed, 22 Oct 2014 21:26:40 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please look into this issue? Jinshan agrees that this looks like a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5781&quot; title=&quot;endless loop in osc_lock_weight()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5781&quot;&gt;&lt;del&gt;LU-5781&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="97372" author="bobijam" created="Fri, 24 Oct 2014 06:11:54 +0000"  >&lt;p&gt;dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5781&quot; title=&quot;endless loop in osc_lock_weight()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5781&quot;&gt;&lt;del&gt;LU-5781&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="99826" author="pjones" created="Fri, 21 Nov 2014 23:57:56 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;I have reopened this ticket because it is proving confusing to separate the approach needed for b2_5 (which does not contain &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3321&quot; title=&quot;2.x single thread/process throughput degraded from 1.8&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3321&quot;&gt;&lt;del&gt;LU-3321&lt;/del&gt;&lt;/a&gt;) and master (which does). Could you please advise what patches Bull require on b2_5?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="99843" author="bobijam" created="Sat, 22 Nov 2014 02:11:13 +0000"  >&lt;p&gt;b2_5 does contain &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3321&quot; title=&quot;2.x single thread/process throughput degraded from 1.8&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3321&quot;&gt;&lt;del&gt;LU-3321&lt;/del&gt;&lt;/a&gt; patch (git commit is 0a6c6fcd46a4e2eb289eff72402e34d329a63d91, which is a combination of commit 154fb1f7 from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3321&quot; title=&quot;2.x single thread/process throughput degraded from 1.8&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3321&quot;&gt;&lt;del&gt;LU-3321&lt;/del&gt;&lt;/a&gt; and commit  bfae5a4e from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4300&quot; title=&quot;ptlrpcd threads deadlocked in cl_lock_mutex_get&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4300&quot;&gt;&lt;del&gt;LU-4300&lt;/del&gt;&lt;/a&gt;).&lt;/p&gt;
</comment>
                            <comment id="99845" author="pjones" created="Sat, 22 Nov 2014 04:59:44 +0000"  >&lt;p&gt;Ah ok. So what do you advise for Bull to use on b2_5?&lt;/p&gt;</comment>
                            <comment id="99847" author="bobijam" created="Sat, 22 Nov 2014 06:03:47 +0000"  >&lt;p&gt;use backport of #12362, #12603&lt;/p&gt;</comment>
                            <comment id="100118" author="sebastien.buisson" created="Wed, 26 Nov 2014 13:29:00 +0000"  >&lt;p&gt;Bobijam,&lt;/p&gt;

&lt;p&gt;Do we also have to revert 0a6c6fcd46a4e2eb289eff72402e34d329a63d91 from b2_5?&lt;br/&gt;
BTW, it seems #12362 and #12603 do not apply cleanly on b2_5.&lt;/p&gt;

&lt;p&gt;TIA,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="100123" author="bobijam" created="Wed, 26 Nov 2014 13:59:04 +0000"  >&lt;p&gt;no, you don&apos;t need to revert it. #12362 is the cure for the loop (#12603 is for another issue, you don&apos;t need it here), and the b2_5 port of #12362 is at &lt;a href=&quot;http://review.whamcloud.com/12859&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12859&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="27132">LU-5781</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Wed, 26 Nov 2014 13:28:17 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>OSC</label>
            <label>Tera100</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwz87:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16240</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Wed, 22 Oct 2014 13:28:17 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>