<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:00:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13356] lctl conf_param hung on the MGS node</title>
                <link>https://jira.whamcloud.com/browse/LU-13356</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;From vmcore&lt;/p&gt;

&lt;p&gt;lctl thread was sleeping and waiting lock to be granted&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 26309  TASK: ffff9730a36b1040  CPU: 6   COMMAND: &quot;lctl&quot;
 #0 [ffff973084edfa08] __schedule at ffffffffaf369b97
 #1 [ffff973084edfa98] schedule at ffffffffaf36a099
 #2 [ffff973084edfaa8] ldlm_completion_ast at ffffffffc1582d45 [ptlrpc]
 #3 [ffff973084edfb50] mgs_completion_ast_generic at ffffffffc141f76c [mgs]
 #4 [ffff973084edfb98] mgs_completion_ast_config at ffffffffc141f983 [mgs]
 #5 [ffff973084edfba8] ldlm_cli_enqueue_local at ffffffffc1583ecc [ptlrpc]
 #6 [ffff973084edfc48] mgs_revoke_lock at ffffffffc14243b4 [mgs]
 #7 [ffff973084edfcf0] mgs_set_param at ffffffffc1441826 [mgs]
 #8 [ffff973084edfd50] mgs_iocontrol at ffffffffc14271ca [mgs]
 #9 [ffff973084edfdd0] class_handle_ioctl at ffffffffc10a40cd [obdclass]
#10 [ffff973084edfe60] obd_class_ioctl at ffffffffc10a46d2 [obdclass]
#11 [ffff973084edfe80] do_vfs_ioctl at ffffffffaee56490
#12 [ffff973084edff00] sys_ioctl at ffffffffaee56731
#13 [ffff973084edff50] system_call_fastpath at ffffffffaf376ddb
 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash-7.2.5_new&amp;gt; ldlm_resource 0xffff9730f7a6ab40
struct ldlm_resource {
  lr_ns_bucket = 0xffff9730a7f8cb18,
  lr_hash = {
    next = 0x0,
    pprev = 0xffff9730f8d22608
  },
  lr_refcount = {
    counter = 295
  },
  lr_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  lr_granted = {
    next = 0xffff97308638c720,
    prev = 0xffff973086a542a0
  },
  lr_waiting = {
    next = 0xffff973085da84e0,
    prev = 0xffff973082f21920
  },
  lr_enqueueing = {
    next = 0xffff9730f7a6ab80,
    prev = 0xffff9730f7a6ab80
  },
  lr_name = {
    name = {3546639893419028083, 0, 0, 0}
  },
  {
    lr_itree = 0x0,
    lr_ibits_queues = 0x0
  },
  {
    lr_contention_time = 0,
    lr_lvb_inode = 0x0
  },
  lr_type = LDLM_PLAIN,
  lr_lvb_len = 0,
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The wait is infinite.&lt;br/&gt;
 Here is four clients which conflicts with lock&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash-7.2.5_new&amp;gt; obd_export.exp_client_uuid 0xffff973085d63000
  exp_client_uuid = {
    uuid = &quot;bb972e10-11bc-7387-336f-6a82a0e0dd52\000\000\000&quot;
  }
crash-7.2.5_new&amp;gt; obd_export.exp_client_uuid 0xffff973085d63400
  exp_client_uuid = {
    uuid = &quot;5915d2ba-94aa-bb2e-5b88-144f699f7fa1\000\000\000&quot;
  }
crash-7.2.5_new&amp;gt; obd_export.exp_client_uuid 0xffff973085d61800
  exp_client_uuid = {
    uuid = &quot;9eeccfff-2a06-f62b-6132-9799e0bcd8aa\000\000\000&quot;
  }
crash-7.2.5_new&amp;gt; obd_export.exp_client_uuid 0xffff973085d66400
  exp_client_uuid = {
    uuid = &quot;b5a63e29-e36a-ea42-6e59-5387ded252b0\000\000\000&quot;
  }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;It looks like problem started from a network errors&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 2031.028363] LNet: 30781:0:(lib-msg.c:703:lnet_attempt_msg_resend()) msg 0@&amp;lt;0:0&amp;gt;-&amp;gt;10.10.100.6@o2ib3 exceeded retry count 3
[ 2039.570481] LustreError: 166-1: MGC10.10.100.3@o2ib3: Connection to MGS (at 10.10.100.3@o2ib3) was lost; in progress operations using this service will fail
[ 2039.586364] LustreError: 19280:0:(ldlm_request.c:147:ldlm_expired_completion_wait()) ### lock timed out (enqueued at 1581103875, 780s ago), entering recovery for MGS@10.10.100.3@o2ib3 ns: MGC10.10.100.3@o2ib3 lock: ffff9711df178000/0x5c3c0316dd62b431 lrc: 4/1,0 mode: --/CR res: [0x3138323131786e73:0x0:0x0].0x0 rrc: 2 type: PLN flags: 0x1000000000000 nid: local remote: 0x5c3c0316dd62b438 expref: -99 pid: 19280 timeout: 0 lvb_type: 0
[ 2039.628629] Lustre: MGS: Received new LWP connection from 10.10.100.3@o2ib3, removing former export from same NID
[ 2039.639855] Lustre: Skipped 8 previous similar messages
[ 2039.646053] Lustre: MGS: Connection restored to 56d1a214-8cb4-e698-a1b5-8ec5fd85505f (at 10.10.100.3@o2ib3)
[ 2039.656802] Lustre: Skipped 7 previous similar messages
[ 2039.663325] LustreError: 31823:0:(ldlm_resource.c:1159:ldlm_resource_complain()) MGC10.10.100.3@o2ib3: namespace resource [0x3138323131786e73:0x0:0x0].0x0 (ffff9731734a8d80) refcount nonzero (1) after lock cleanup; forcing cleanup.
[ 2039.663336] LustreError: 19280:0:(mgc_request.c:599:do_requeue()) failed processing log: -5
[ 2039.695497] Lustre: 31823:0:(ldlm_resource.c:1772:ldlm_resource_dump()) --- Resource: [0x3138323131786e73:0x0:0x0].0x0 (ffff9731734a8d80) refcount = 2
[ 2039.711112] Lustre: 31823:0:(ldlm_resource.c:1789:ldlm_resource_dump()) Waiting locks:
[ 2039.720115] Lustre: 31823:0:(ldlm_resource.c:1791:ldlm_resource_dump()) ### ### ns: ?? lock: ffff9711df178000/0x5c3c0316dd62b431 lrc: 2/0,0 mode: --/CR res: ?? rrc=?? type: ??? flags: 0x1106400000000 nid: local remote: 0x5c3c0316dd62b438 expref: -99 pid: 19280 timeout: 0 lvb_type: 0

....
|00010000:02000400:6.0:1581107912.144637:0:19276:0:(ldlm_lib.c:1162:target_handle_connect()) MGS: Received new LWP connection from 162@gni99, removing former export from same NID|
|00010000:00080000:6.0:1581107912.144640:0:19276:0:(ldlm_lib.c:1242:target_handle_connect()) MGS: connection from b5a63e29-e36a-ea42-6e59-5387ded252b0@162@gni99 t0 exp ffff973085d66400 cur 1581107912 last 1581107912|
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;MGC uses OBD_CONNECT_MNE_SWAB flag.&lt;br/&gt;
 The root cause of this problem is flag OBD_CONNECT_MNE_SWAB equal to OBD_CONNECT_MDS_MDS. OBD_CONNECT_MNE_SWAB was used for 2.2 clients for MNE swabbing. OBD_CONNECT_MDS_MDS flag is used to skip export fail during reconnect for MDS-MDS interaction. Locks for MDS-MDS are not added to a waiting_locks_list because there is no eviction and so on. This leads to a situation when MGS can not cancel locks for a clients if client doesn&apos;t receive/respond to a blocking ast.&lt;/p&gt;</description>
                <environment></environment>
        <key id="58344">LU-13356</key>
            <summary>lctl conf_param hung on the MGS node</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="aboyko">Alexander Boyko</assignee>
                                    <reporter username="aboyko">Alexander Boyko</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Wed, 11 Mar 2020 11:15:51 +0000</created>
                <updated>Fri, 22 Sep 2023 22:25:15 +0000</updated>
                            <resolved>Tue, 14 Apr 2020 13:30:58 +0000</resolved>
                                    <version>Lustre 2.14.0</version>
                    <version>Lustre 2.12.5</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                    <fixVersion>Lustre 2.12.9</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="265085" author="gerrit" created="Wed, 11 Mar 2020 11:17:13 +0000"  >&lt;p&gt;Alexander Boyko (c17825@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/37880&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37880&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13356&quot; title=&quot;lctl conf_param hung on the MGS node&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13356&quot;&gt;&lt;del&gt;LU-13356&lt;/del&gt;&lt;/a&gt; client: don&apos;t use OBD_CONNECT_MNE_SWAB&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 0b965f10ebc350fb9c083f415178e787c7996bbe&lt;/p&gt;</comment>
                            <comment id="267514" author="gerrit" created="Tue, 14 Apr 2020 08:09:43 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/37880/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37880/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13356&quot; title=&quot;lctl conf_param hung on the MGS node&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13356&quot;&gt;&lt;del&gt;LU-13356&lt;/del&gt;&lt;/a&gt; client: don&apos;t use OBD_CONNECT_MNE_SWAB&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 3fe77a129e131014ff654bde616a62a1e243e322&lt;/p&gt;</comment>
                            <comment id="267554" author="pjones" created="Tue, 14 Apr 2020 13:30:58 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                            <comment id="281192" author="degremoa" created="Thu, 1 Oct 2020 11:32:24 +0000"  >&lt;p&gt;If I understand correctly, that means that you could not do an IR if one client is not responding.&lt;/p&gt;

&lt;p&gt;It seems this problem is important enough to be backported to 2.12 LTS, no?&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="282492" author="dauchy" created="Fri, 16 Oct 2020 23:09:52 +0000"  >&lt;p&gt;We also hit this on 2.12, and would benefit from a backport.&#160; Chances are good there is at least one client not responding somewhere on a large system.&lt;/p&gt;</comment>
                            <comment id="290239" author="gerrit" created="Mon, 25 Jan 2021 08:42:28 +0000"  >&lt;p&gt;Etienne AUJAMES (eaujames@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41309&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41309&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13356&quot; title=&quot;lctl conf_param hung on the MGS node&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13356&quot;&gt;&lt;del&gt;LU-13356&lt;/del&gt;&lt;/a&gt; client: don&apos;t use OBD_CONNECT_MNE_SWAB&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: bf855c643c4c89ac57841e80705ef617cf65e02b&lt;/p&gt;</comment>
                            <comment id="290244" author="eaujames" created="Mon, 25 Jan 2021 08:53:59 +0000"  >&lt;p&gt;Hello,&lt;/p&gt;

&lt;p&gt;We hit this issue in production. So I backported the patch &lt;a href=&quot;https://review.whamcloud.com/37880/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37880/&lt;/a&gt; on b2_12.&lt;/p&gt;

&lt;p&gt;I am aware this patch remove the support of OBD_CONNECT_MNE_SWAB, so I don&apos;t expect that land on the b2_12.&lt;br/&gt;
But it seems important enough to be integrated to our version of Lustre.&lt;/p&gt;</comment>
                            <comment id="333843" author="gerrit" created="Thu, 5 May 2022 06:10:15 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/41309/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41309/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13356&quot; title=&quot;lctl conf_param hung on the MGS node&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13356&quot;&gt;&lt;del&gt;LU-13356&lt;/del&gt;&lt;/a&gt; client: don&apos;t use OBD_CONNECT_MNE_SWAB&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 337b1d1bb301725b91380326985af52a5bede3a1&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="50844">LU-10674</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="68047">LU-15453</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="54950">LU-11990</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="56858">LU-12735</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="68591">LU-15539</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>LTS12</label>
            <label>mgs</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00vbj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>