<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:48:27 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11961] nodemap_config_set_active()) ASSERTION( config-&gt;nmc_default_nodemap ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-11961</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Looks like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8824&quot; title=&quot;sanity-sec test_9: ASSERTION( config-&amp;gt;nmc_default_nodemap )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8824&quot;&gt;&lt;del&gt;LU-8824&lt;/del&gt;&lt;/a&gt; came back, this was reported in latest master testing:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 4228.011703] Lustre: DEBUG MARKER: == sanity-sec test 26: test transferring very large nodemap ========================================== 06:23:44 (1549952624)
[ 4271.913653] LustreError: 11659:0:(nodemap_handler.c:1663:nodemap_config_set_active()) ASSERTION( config-&amp;gt;nmc_default_nodemap ) failed: 
[ 4271.915235] LustreError: 11659:0:(nodemap_handler.c:1663:nodemap_config_set_active()) LBUG
[ 4271.916117] Pid: 11659, comm: ll_cfg_requeue 3.10.0-957.1.3.el7_lustre.x86_64 #1 SMP Wed Feb 6 06:53:01 UTC 2019
[ 4271.917149] Call Trace:
[ 4271.917451]  [&amp;lt;ffffffffc09417cc&amp;gt;] libcfs_call_trace+0x8c/0xc0 [libcfs]
[ 4271.918283]  [&amp;lt;ffffffffc094187c&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
[ 4271.918961]  [&amp;lt;ffffffffc0e0a6f7&amp;gt;] nodemap_config_set_active+0x2a7/0x2e0 [ptlrpc]
[ 4271.920041]  [&amp;lt;ffffffffc0e13498&amp;gt;] nodemap_config_set_active_mgc+0x38/0x1e0 [ptlrpc]
[ 4271.920969]  [&amp;lt;ffffffffc0cf2e12&amp;gt;] mgc_process_recover_nodemap_log+0x352/0xe40 [mgc]
[ 4271.921830]  [&amp;lt;ffffffffc0cf588e&amp;gt;] mgc_process_log+0x7be/0x8f0 [mgc]
[ 4271.922527]  [&amp;lt;ffffffffc0cf7938&amp;gt;] mgc_requeue_thread+0x2c8/0x870 [mgc]
[ 4271.923365]  [&amp;lt;ffffffffa22c1c31&amp;gt;] kthread+0xd1/0xe0
[ 4271.923935]  [&amp;lt;ffffffffa2974c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 4271.924670]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/a68fa72d-794e-45bb-ac5b-114917878f9c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/a68fa72d-794e-45bb-ac5b-114917878f9c&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="54866">LU-11961</key>
            <summary>nodemap_config_set_active()) ASSERTION( config-&gt;nmc_default_nodemap ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="sebastien">Sebastien Buisson</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Tue, 12 Feb 2019 17:14:19 +0000</created>
                <updated>Wed, 17 Feb 2021 21:18:38 +0000</updated>
                            <resolved>Wed, 17 Feb 2021 21:18:38 +0000</resolved>
                                    <version>Lustre 2.13.0</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="241776" author="green" created="Tue, 12 Feb 2019 18:17:28 +0000"  >&lt;p&gt;There was another crash in b2_10 in January: &lt;a href=&quot;https://testing.whamcloud.com/test_sessions/2988eeb4-34a4-4ba9-8a21-048b8394097d&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/2988eeb4-34a4-4ba9-8a21-048b8394097d&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;So this likely means the issue is not caused by a recent landing.&lt;/p&gt;</comment>
                            <comment id="241868" author="sebastien" created="Wed, 13 Feb 2019 15:24:55 +0000"  >&lt;p&gt;I do not think the origin of the problem is the same as in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8824&quot; title=&quot;sanity-sec test_9: ASSERTION( config-&amp;gt;nmc_default_nodemap )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8824&quot;&gt;&lt;del&gt;LU-8824&lt;/del&gt;&lt;/a&gt;. In this ticket, the issue was happening only on ZFS, and was due to the fact that ZFS index files return the keys in hash sorted order, instead of key sorted order. Kit made a patch to make sure create records are loaded first, by doing a 2-pass load.&lt;/p&gt;

&lt;p&gt;Here the issue happened with ldiskfs, so there might be another issue. In the 2.10 branch occurence, we have the following messages:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[12153.223611] Lustre: DEBUG MARKER: == sanity-sec test 26: test transferring very large nodemap ========================================== 03:21:40 (1548300100)
[12155.289243] Lustre: 1848:0:(nodemap_handler.c:1076:nodemap_create()) adding nodemap &apos;c291&apos; to config without default nodemap
[12155.291275] Lustre: 1848:0:(nodemap_handler.c:1076:nodemap_create()) Skipped 25 previous similar messages
[12254.129250] LustreError: 1848:0:(nodemap_handler.c:1524:nodemap_config_set_active()) ASSERTION( config-&amp;gt;nmc_default_nodemap ) failed: 
[12254.131468] LustreError: 1848:0:(nodemap_handler.c:1524:nodemap_config_set_active()) LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So this is yet another hint at the fact that the default nodemap is not there yet. Although it does no harm to add regular nodemap entries without having a default one in the config, this situation lasts for too long and we end up trying to activate the nodemap with still no default nodemap entry.&lt;/p&gt;

&lt;p&gt;As it is very difficult to investigate this issue without debug logs, I will push a debug patch that returns an error instead of asserting.&lt;/p&gt;</comment>
                            <comment id="241869" author="gerrit" created="Wed, 13 Feb 2019 15:25:35 +0000"  >&lt;p&gt;Sebastien Buisson (sbuisson@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34244&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34244&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11961&quot; title=&quot;nodemap_config_set_active()) ASSERTION( config-&amp;gt;nmc_default_nodemap ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11961&quot;&gt;&lt;del&gt;LU-11961&lt;/del&gt;&lt;/a&gt; debug: debug patch for default nodemap&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 08fb1943e6726a8a554018b407d459a54d3eacc6&lt;/p&gt;</comment>
                            <comment id="241872" author="sebastien" created="Wed, 13 Feb 2019 15:47:07 +0000"  >&lt;p&gt;I have noticed that nmc_default_nodemap is modified outside of nodemap_create(). Not sure that it is causing the problem reported in this ticket, but it should be fixed anyway.&lt;/p&gt;</comment>
                            <comment id="241873" author="gerrit" created="Wed, 13 Feb 2019 15:47:21 +0000"  >&lt;p&gt;Sebastien Buisson (sbuisson@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34245&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34245&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11961&quot; title=&quot;nodemap_config_set_active()) ASSERTION( config-&amp;gt;nmc_default_nodemap ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11961&quot;&gt;&lt;del&gt;LU-11961&lt;/del&gt;&lt;/a&gt; nodemap: nodemap_create() handles default nodemap&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6d8057712ecac178f7edfebcd838faa8e44b86d4&lt;/p&gt;</comment>
                            <comment id="262552" author="jfilizetti" created="Tue, 4 Feb 2020 16:21:48 +0000"  >&lt;p&gt;&lt;tt&gt;I don&apos;t think it&apos;s related but shouldn&apos;t the LASSERT(active_config != config) in nodemap_config_set_active come after the lock?&lt;/tt&gt;&lt;/p&gt;</comment>
                            <comment id="263285" author="gerrit" created="Fri, 14 Feb 2020 05:49:22 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34245/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34245/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11961&quot; title=&quot;nodemap_config_set_active()) ASSERTION( config-&amp;gt;nmc_default_nodemap ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11961&quot;&gt;&lt;del&gt;LU-11961&lt;/del&gt;&lt;/a&gt; nodemap: nodemap_create() handles default nodemap&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: ebea7c87f758d843cf71fb5873b2955f6fbf9044&lt;/p&gt;</comment>
                            <comment id="263340" author="pjones" created="Fri, 14 Feb 2020 13:31:27 +0000"  >&lt;p&gt;Is work still ongoing under this ticket?&lt;/p&gt;</comment>
                            <comment id="263354" author="sebastien" created="Fri, 14 Feb 2020 14:20:46 +0000"  >&lt;p&gt;The patch that landed might not be enough to fix the issue reported here. But it has been a while since I have seen this assertion, so I am not sure if it is still occurring on master.&lt;/p&gt;</comment>
                            <comment id="266834" author="bzzz" created="Sat, 4 Apr 2020 08:18:03 +0000"  >&lt;p&gt;might be related?&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Lustre: DEBUG MARKER: == sanity test 60c: unlink file when mds full ======================================================== 22:23:16 (1586020996)
BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
PGD 27f329067 P4D 27f329067 PUD 231175067 PMD 0 
Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
CPU: 1 PID: 9139 Comm: ll_cfg_requeue Tainted: P        W  O   X --------- ---  4.18.0 #4
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
RIP: 0010:nodemap_config_set_active_mgc+0x77/0x1d0 [ptlrpc]
Code: e8 4e 5d 10 e1 48 8b 05 c7 4f 0b 00 48 8d 58 f0 48 3d 60 ea 69 a0 0f 84 92 00 00 00 49 bc 01 00 00 00 47 04 00 00 48 8b 73 08 &amp;lt;48&amp;gt; 8b 6e 08 48 85 ed 74 39 48 8b 45 08 f6 00 02 75 30 48 c7 c7 a0
RSP: 0018:ffff880288eabd80 EFLAGS: 00010287
RAX: ffff88027c4c0c50 RBX: ffff88027c4c0c40 RCX: 0000000000000001
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000282
RBP: ffff880254872000 R08: 0000000000000000 R09: ffff880258d3e600
R10: 0000000000000000 R11: 0000000000000000 R12: 0000044700000001
R13: ffff88026b8b4588 R14: ffff8802a6122ec0 R15: ffff8802a6122e00
FS:  0000000000000000(0000) GS:ffff8802b2200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000008 CR3: 000000024dd81000 CR4: 00000000000006a0
Call Trace:
 mgc_process_log+0x266c/0x26c0 [mgc]
 mgc_requeue_thread+0x2ad/0x720 [mgc]
 ? wait_woken+0x90/0x90
 kthread+0x105/0x140
 ? mgc_process_config+0x17c0/0x17c0 [mgc]
 ? kthread_flush_work_fn+0x10/0x10
 ret_from_fork+0x24/0x30
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="289426" author="jamesanunez" created="Wed, 13 Jan 2021 17:21:30 +0000"  >&lt;p&gt;I don&apos;t see the assertion, but is the crash at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/caac4a91-55b0-45ab-af99-356db88af611&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/caac4a91-55b0-45ab-af99-356db88af611&lt;/a&gt; the same or related to this ticket?&lt;/p&gt;

&lt;p&gt;On the OSS, &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[74388.707838] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
[74388.709261] PGD 0 P4D 0 
[74388.709696] Oops: 0000 [#1] SMP PTI
[74388.710293] CPU: 0 PID: 7056 Comm: ll_cfg_requeue Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-240.1.1.el8_lustre.x86_64 #1
[74388.712339] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[74388.713761] RIP: 0010:nodemap_config_set_active_mgc+0x92/0x210 [ptlrpc]
[74388.714844] Code: 00 48 8d 58 f0 48 3d 40 ac 22 c1 0f 84 9f 00 00 00 49 c7 c7 a0 2f 25 c1 49 c7 c6 c8 b8 1d c1 49 c7 c5 30 17 18 c1 48 8b 73 08 &amp;lt;4c&amp;gt; 8b 66 08 4d 85 e4 74 3a 49 8b 54 24 08 f6 02 02 75 30 48 c7 c7
[74388.717857] RSP: 0018:ffffa43f40b8fd10 EFLAGS: 00010287
[74388.718712] RAX: ffff8e2db75694d0 RBX: ffff8e2db75694c0 RCX: 0000000000000000
[74388.719871] RDX: ffff8e2d8ed10000 RSI: 0000000000000000 RDI: ffffffffc122ac20
[74388.721023] RBP: ffff8e2dac4c1500 R08: ffff8e2dbda2f160 R09: ffff8e2d4fc02a00
[74388.722184] R10: ffff8e2d88aa3000 R11: 0000000000000001 R12: ffff8e2d77ef4800
[74388.723342] R13: ffffffffc1181730 R14: ffffffffc11db8c8 R15: ffffffffc1252fa0
[74388.724496] FS:  0000000000000000(0000) GS:ffff8e2dbda00000(0000) knlGS:0000000000000000
[74388.725804] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[74388.726748] CR2: 0000000000000008 CR3: 0000000078c0a006 CR4: 00000000000606f0
[74388.727905] Call Trace:
[74388.728378]  mgc_process_recover_nodemap_log+0xe14/0xeb0 [mgc]
[74388.729344]  mgc_process_log+0x7a7/0x840 [mgc]
[74388.730088]  mgc_requeue_thread+0x29a/0x720 [mgc]
[74388.730896]  ? finish_wait+0x80/0x80
[74388.731494]  ? mgc_process_config+0xe10/0xe10 [mgc]
[74388.732314]  kthread+0x112/0x130
[74388.732869]  ? kthread_flush_work_fn+0x10/0x10
[74388.733627]  ret_from_fork+0x35/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

</comment>
                            <comment id="289428" author="sebastien" created="Wed, 13 Jan 2021 17:47:08 +0000"  >&lt;p&gt;I am not sure it is the same issue, but it would need to have a proper dump in order to investigate the origin of the NULL pointer with the &lt;tt&gt;crash&lt;/tt&gt; utility.&lt;br/&gt;
Does Maloo save the crash file somewhere?&lt;/p&gt;</comment>
                            <comment id="292230" author="pjones" created="Wed, 17 Feb 2021 21:18:38 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00bj3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>