<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:21:09 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8857] sptlrpc_target_local_read_conf() missing llog context</title>
                <link>https://jira.whamcloud.com/browse/LU-8857</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Booting the MDTs at the same time, MDT0001 (on zinc2) failed to exit recovery.  The console log reports&lt;br/&gt;
Console log shows:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2016-11-15 16:29:26 [ 2960.562708] LustreError: 144169:0 (sec_config.c:1103:sptlrpc_target_local_read_conf()) missing llog context
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;about 20 seconds after Lustre reported Build Version in log.  No intervening errors.&lt;/p&gt;

&lt;p&gt;Dumped the debug logs from that MDT.  The thread that reported the error recorded the following in the debug log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;   1 2016-11-15 16:29:26.845220 00000020:01000000:24.0::0:144169:0:(obd_config.c:1487:class_config_llog_handler()) Marker, inst_flg=0x8 mark_flg=0x1
   2 2016-11-15 16:29:26.845227 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf010
   3 2016-11-15 16:29:26.845228 00000020:00000080:24.0::0:144169:0:(obd_config.c:1218:class_process_config()) marker 8 (0x1) lsh-MDT0001-mdtl lov setup
   4 2016-11-15 16:29:26.845230 00000020:01000000:24.0::0:144169:0:(obd_config.c:1562:class_config_llog_handler()) For 2.x interoperability, rename obd type from lov to lod (lsh-MDT0001)
   5 2016-11-15 16:29:26.845231 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf001
   6 2016-11-15 16:29:26.845233 00000020:00000080:24.0::0:144169:0:(obd_config.c:362:class_attach()) attach type lod name: lsh-MDT0001-mdtlov uuid: lsh-MDT0001-mdtlov_UUID
   7 2016-11-15 16:29:26.857650 00000020:00000080:24.0::0:144169:0:(genops.c:371:class_newdev()) Adding new device lsh-MDT0001-mdtlov (ffff887f01a68000)
   8 2016-11-15 16:29:26.857653 00000020:00000080:24.0::0:144169:0:(obd_config.c:432:class_attach()) OBD: dev 3 attached type lod with refcount 1
   9 2016-11-15 16:29:26.857656 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf003
  10 2016-11-15 16:29:26.857701 00080000:01000000:24.0::0:144169:0:(osd_handler.c:1248:osd_obd_connect()) connect #1
  11 2016-11-15 16:29:26.857703 00000020:00000080:24.0::0:144169:0:(genops.c:1167:class_connect()) connect: client lsh-MDT0001-osd_UUID, cookie 0xd50fdc70539570e9
  12 2016-11-15 16:29:26.857727 00000020:00000080:24.0::0:144169:0:(obd_config.c:552:class_setup()) finished setup of obd lsh-MDT0001-mdtlov (uuid lsh-MDT0001-mdtlov_UUID)
  13 2016-11-15 16:29:26.857730 00000020:01000000:24.0::0:144169:0:(obd_config.c:1487:class_config_llog_handler()) Marker, inst_flg=0x2 mark_flg=0x2
  14 2016-11-15 16:29:26.857731 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf010
  15 2016-11-15 16:29:26.857731 00000020:00000080:24.0::0:144169:0:(obd_config.c:1218:class_process_config()) marker 8 (0x2) lsh-MDT0001-mdtl lov setup
  16 2016-11-15 16:29:26.857732 00000020:01000000:24.0::0:144169:0:(obd_config.c:1487:class_config_llog_handler()) Marker, inst_flg=0x0 mark_flg=0x1
  17 2016-11-15 16:29:26.857733 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf010
  18 2016-11-15 16:29:26.857734 00000020:00000080:24.0::0:144169:0:(obd_config.c:1218:class_process_config()) marker 9 (0x1) lsh-MDT0001 add mdt
  19 2016-11-15 16:29:26.857735 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf001
  20 2016-11-15 16:29:26.857735 00000020:00000080:24.0::0:144169:0:(obd_config.c:362:class_attach()) attach type mdt name: lsh-MDT0001 uuid: lsh-MDT0001_UUID
  21 2016-11-15 16:29:26.857754 00000020:00000080:24.0::0:144169:0:(genops.c:371:class_newdev()) Adding new device lsh-MDT0001 (ffff887f01a68f28)
  22 2016-11-15 16:29:26.857755 00000020:00000080:24.0::0:144169:0:(obd_config.c:432:class_attach()) OBD: dev 4 attached type mdt with refcount 1
  23 2016-11-15 16:29:26.857756 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf007
  24 2016-11-15 16:29:26.857757 00000020:00000080:24.0::0:144169:0:(obd_config.c:1176:class_process_config()) mountopt: profile lsh-MDT0001 osc lsh-MDT0001-mdtlov mdc (null)
  25 2016-11-15 16:29:26.857758 00000020:01000000:24.0::0:144169:0:(obd_config.c:873:class_add_profile()) Add profile lsh-MDT0001
  26 2016-11-15 16:29:26.857760 00000020:00000080:24.0::0:144169:0:(obd_config.c:1148:class_process_config()) processing cmd: cf003
  27 2016-11-15 16:29:26.857772 00000020:01000004:24.0::0:144169:0:(obd_mount_server.c:175:server_get_mount()) get mount ffff883f70083800 from lsh-MDT0001, refs=2
  28 2016-11-15 16:29:26.857776 00000020:00000080:24.0::0:144169:0:(obd_config.c:362:class_attach()) attach type mdd name: lsh-MDD0001 uuid: lsh-MDD0001_UUID
  29 2016-11-15 16:29:26.868373 00000020:00000080:24.0::0:144169:0:(genops.c:371:class_newdev()) Adding new device lsh-MDD0001 (ffff887f01a69e50)
  30 2016-11-15 16:29:26.868376 00000020:00000080:24.0::0:144169:0:(obd_config.c:432:class_attach()) OBD: dev 5 attached type mdd with refcount 1
  31 2016-11-15 16:29:26.868397 00000004:01000000:24.0::0:144169:0:(lod_dev.c:1687:lod_obd_connect()) connect #0
  32 2016-11-15 16:29:26.868399 00000020:00000080:24.0::0:144169:0:(genops.c:1167:class_connect()) connect: client lsh-MDT0001-mdtlov_UUID, cookie 0xd50fdc70539570fe
  33 2016-11-15 16:29:26.868412 00000020:00000080:24.0::0:144169:0:(obd_config.c:552:class_setup()) finished setup of obd lsh-MDD0001 (uuid lsh-MDD0001_UUID)
  34 2016-11-15 16:29:26.868420 00000004:01000000:24.0::0:144169:0:(mdd_device.c:1178:mdd_obd_connect()) connect #0
  35 2016-11-15 16:29:26.868422 00000020:00000080:24.0::0:144169:0:(genops.c:1167:class_connect()) connect: client lsh-MDD0001_UUID, cookie 0xd50fdc7053957105
  36 2016-11-15 16:29:26.868423 00080000:01000000:24.0::0:144169:0:(osd_handler.c:1248:osd_obd_connect()) connect #2
  37 2016-11-15 16:29:26.868425 00000020:00000080:24.0::0:144169:0:(genops.c:1167:class_connect()) connect: client lsh-MDT0001-osd_UUID, cookie 0xd50fdc705395710c
  38 2016-11-15 16:29:26.920911 02000000:00020000:24.0::0:144169:0:(sec_config.c:1103:sptlrpc_target_local_read_conf()) missing llog context
  39 2016-11-15 16:29:26.931932 00000020:00000080:24.0::0:144169:0:(obd_config.c:552:class_setup()) finished setup of obd lsh-MDT0001 (uuid lsh-MDT0001_UUID)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>lustre-2.8.0_5.chaos&lt;br/&gt;
kernel-3.10.0-510.0.0.2chaos.ch6.x86_64&lt;br/&gt;
zfs-0.7.0-0.5llnl.ch6.x86_64&lt;br/&gt;
DNE file system with 16 MDTs</environment>
        <key id="41716">LU-8857</key>
            <summary>sptlrpc_target_local_read_conf() missing llog context</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="ofaaland">Olaf Faaland</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Tue, 22 Nov 2016 00:09:23 +0000</created>
                <updated>Tue, 14 Mar 2017 06:06:26 +0000</updated>
                            <resolved>Tue, 14 Mar 2017 06:06:25 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="174565" author="ofaaland" created="Tue, 22 Nov 2016 00:14:05 +0000"  >&lt;p&gt;Attached complete debug logs: dk.zinc2.1479316998.gz&lt;/p&gt;</comment>
                            <comment id="174568" author="ofaaland" created="Tue, 22 Nov 2016 00:52:07 +0000"  >&lt;p&gt;We saw the same error message earlier today on a newly formatted DNE file system (lquake), before the file system had been mounted by clients.&lt;br/&gt;
We have debug logs and can attach them if that would be helpful.&lt;/p&gt;</comment>
                            <comment id="174698" author="pjones" created="Tue, 22 Nov 2016 18:48:31 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please assist with this one?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="174754" author="ofaaland" created="Tue, 22 Nov 2016 23:45:47 +0000"  >&lt;p&gt;Just realized that 11 of the 16 MDTs reported this error.   MDT0001 was just one of them.&lt;br/&gt;
zinc&lt;span class=&quot;error&quot;&gt;&amp;#91;3-6,8,10-11,13-16&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="175249" author="ofaaland" created="Mon, 28 Nov 2016 19:08:16 +0000"  >&lt;p&gt;As indicated on 22 Nov, the &quot;missing llog context&quot; error message appeared on many nodes within the cluster.  Most of those nodes successfully exited recovery, but MDT0001 did not.&lt;/p&gt;

&lt;p&gt;So we have two symptoms:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;MDT fails to exit recovery&lt;/li&gt;
	&lt;li&gt;MDT reports &quot;missing llog context&quot; error message&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Are they related or independent?&lt;/p&gt;</comment>
                            <comment id="175251" author="ofaaland" created="Mon, 28 Nov 2016 19:11:03 +0000"  >&lt;p&gt;Increased priority and marked topllnl because this file system is down until this is resolved.  It is one of only 2 DNE test clusters available to us.&lt;/p&gt;</comment>
                            <comment id="175256" author="di.wang" created="Mon, 28 Nov 2016 19:46:16 +0000"  >&lt;p&gt;Olaf: You can ignore &quot;missing llog context&quot;, that is noise, which should be un-related with the endless recovery.  &lt;/p&gt;

&lt;p&gt;According to the debug log, it looks like MDT0001 is keep waiting for all of clients to connect. Olaf, all clients and other MDTs are up at that moment? and also it seems recovery debug mask (D_HA            0x00080000) is not enabled?&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;0010000:02000400:0.0:1479257111.133403:0:96245:0:(ldlm_lib.c:779:target_handle_reconnect()) lsh-MDT0001: Client lsh-MDT0008-mdtlov_UUID (at 172.19.3.9@o2ib600) reconnecting, waiting for 1187 clients in recovery for 0:00
00000100:02000000:0.0:1479257111.133417:0:96245:0:(import.c:1539:ptlrpc_import_recovery_state_machine()) lsh-MDT0001: Connection restored to  (at 172.19.3.9@o2ib600)
00010000:02000400:6.0:1479257111.467229:0:143823:0:(ldlm_lib.c:779:target_handle_reconnect()) lsh-MDT0001: Client lsh-MDT0005-mdtlov_UUID (at 172.19.3.6@o2ib600) reconnecting, waiting for 1187 clients in recovery for 0:00
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Olaf: could you please also post the stack trace for &quot;tgt_recover_1&quot;?  thanks.&lt;/p&gt;</comment>
                            <comment id="175303" author="ofaaland" created="Mon, 28 Nov 2016 23:18:25 +0000"  >&lt;p&gt;Di,&lt;br/&gt;
Thanks.   The servers have been restarted while I looked into the message, so I lost the state you are looking for.  I&apos;ll use this ticket for a patch to change the debug mask on the &quot;missing llog context&quot; message.&lt;/p&gt;</comment>
                            <comment id="175329" author="ofaaland" created="Tue, 29 Nov 2016 00:25:06 +0000"  >&lt;p&gt;Removed topllnl label because the log message does not reflect an actual error.&lt;/p&gt;</comment>
                            <comment id="175650" author="laisiyao" created="Wed, 30 Nov 2016 14:17:51 +0000"  >&lt;p&gt;&#8220;missing log context&quot; looks to be a bug introduced in server code refactor long ago, and after I tweaked the code a bit it&apos;s reproduced, but I still need to understand the refactor more to fix it.&lt;/p&gt;</comment>
                            <comment id="175700" author="ofaaland" created="Wed, 30 Nov 2016 16:40:54 +0000"  >&lt;p&gt;Lai,&lt;br/&gt;
OK, thanks for the update.&lt;/p&gt;</comment>
                            <comment id="176408" author="gerrit" created="Sun, 4 Dec 2016 14:59:04 +0000"  >&lt;p&gt;Lai Siyao (lai.siyao@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/24119&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24119&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8857&quot; title=&quot;sptlrpc_target_local_read_conf() missing llog context&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8857&quot;&gt;&lt;del&gt;LU-8857&lt;/del&gt;&lt;/a&gt; llog: init llog context for target&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2cd184c00ed99b250ce4ee636b78716738f29bdd&lt;/p&gt;</comment>
                            <comment id="177802" author="ofaaland" created="Thu, 15 Dec 2016 00:59:30 +0000"  >&lt;p&gt;Hi all,&lt;br/&gt;
Can we get some review of Lai&apos;s patch?&lt;br/&gt;
thanks,&lt;br/&gt;
Olaf&lt;/p&gt;</comment>
                            <comment id="177803" author="ofaaland" created="Thu, 15 Dec 2016 01:01:11 +0000"  >&lt;p&gt;Updated priority which had been set to Critical when I thought the error message was related to recovery problems, and not updated after I learned otherwise.&lt;/p&gt;</comment>
                            <comment id="188191" author="gerrit" created="Tue, 14 Mar 2017 02:58:41 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/24119/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24119/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8857&quot; title=&quot;sptlrpc_target_local_read_conf() missing llog context&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8857&quot;&gt;&lt;del&gt;LU-8857&lt;/del&gt;&lt;/a&gt; config: refactor sptlrpc config process&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: df00ee15534419070c559acdb913898fc9dc3a00&lt;/p&gt;</comment>
                            <comment id="188205" author="pjones" created="Tue, 14 Mar 2017 06:06:26 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="24136" name="dk.zinc2.1479316998.gz" size="1459116" author="ofaaland" created="Tue, 22 Nov 2016 00:14:05 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyw8f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>