<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:45:33 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11629] MDS panic under load - lu_context_key_get</title>
                <link>https://jira.whamcloud.com/browse/LU-11629</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When running a very recent (few days ago) copy of master under heavy load (on real hardware), we hit MDS panics relatively easily.&lt;/p&gt;

&lt;p&gt;Here&apos;s the basic crash signature:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
 [16840.816521] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[16840.827221] IP: [&amp;lt;ffffffffc0bae433&amp;gt;] lu_context_key_get+0x13/0x30 [obdclass]
[16840.837127] PGD 0
[16840.841095] Oops: 0000 [#1] SMP
[.....]
[16841.012065] CPU: 18 PID: 145031 Comm: ldlm_cn01_053 Tainted: G OE ------------ 3.10.0-693.21.1.x3.1.9.x86_64 #1
[16841.026546] Hardware name: Intel Corporation S2600WT2R/S2600WT2R, BIOS SE5C610.86B.01.01.0021.032120170601 03/21/2017
[16841.040373] task: ffff880e3b296eb0 ti: ffff881ee0e44000 task.ti: ffff881ee0e44000
[16841.050648] RIP: 0010:[&amp;lt;ffffffffc0bae433&amp;gt;] [&amp;lt;ffffffffc0bae433&amp;gt;] lu_context_key_get+0x13/0x30 [obdclass]
[16841.063235] RSP: 0018:ffff881ee0e47a28 EFLAGS: 00010246
[16841.071023] RAX: 0000000000000014 RBX: ffff881dc15a2f40 RCX: ffff881ee0e47aac
[16841.080843] RDX: ffff881e50b05930 RSI: ffffffffc1325c40 RDI: 0000000000000000
[16841.090656] RBP: ffff881ee0e47a70 R08: ffff880fef20c000 R09: 0000000000000130
[16841.100468] R10: 0000000000000000 R11: ffff881e50b05800 R12: ffff881ee0e47aac
[16841.110290] R13: ffff881e50b05930 R14: ffff881dc15a2f40 R15: 0000000000000000
[16841.120113] FS: 0000000000000000(0000) GS:ffff88203df80000(0000) knlGS:0000000000000000
[16841.130986] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[16841.139190] CR2: 0000000000000010 CR3: 0000000fdc790000 CR4: 00000000003607e0
[16841.148961] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[16841.158694] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[16841.168389] Call Trace:
[16841.172820] [&amp;lt;ffffffffc12e7ad4&amp;gt;] ? mdt_lvbo_fill+0x74/0xa80 [mdt]
[16841.181467] [&amp;lt;ffffffffc0dc6852&amp;gt;] ldlm_server_completion_ast+0x242/0x9e0 [ptlrpc]
[16841.191573] [&amp;lt;ffffffffc0dc6610&amp;gt;] ? ldlm_server_blocking_ast+0xa40/0xa40 [ptlrpc]
[16841.201642] [&amp;lt;ffffffffc0d98748&amp;gt;] ldlm_work_cp_ast_lock+0xa8/0x1d0 [ptlrpc]
[16841.211100] [&amp;lt;ffffffffc0de062a&amp;gt;] ptlrpc_set_wait+0x7a/0x8d0 [ptlrpc]
[16841.219944] [&amp;lt;ffffffffc09ba2b8&amp;gt;] ? cfs_hash_bd_from_key+0x38/0xb0 [libcfs]
[16841.229338] [&amp;lt;ffffffff811e4d1d&amp;gt;] ? kmem_cache_alloc_node_trace+0x11d/0x210
[16841.238708] [&amp;lt;ffffffffc0b90e19&amp;gt;] ? lprocfs_counter_add+0xf9/0x160 [obdclass]
[16841.248345] [&amp;lt;ffffffffc0d986a0&amp;gt;] ? ldlm_work_gl_ast_lock+0x3a0/0x3a0 [ptlrpc]
[16841.258047] [&amp;lt;ffffffffc0dd6e80&amp;gt;] ? ptlrpc_prep_set+0xc0/0x260 [ptlrpc]
[16841.267028] [&amp;lt;ffffffffc0d9e245&amp;gt;] ldlm_run_ast_work+0xd5/0x3a0 [ptlrpc]
[16841.275975] [&amp;lt;ffffffffc0d9f7a9&amp;gt;] __ldlm_reprocess_all+0x129/0x380 [ptlrpc]
[16841.285292] [&amp;lt;ffffffffc0d9fa10&amp;gt;] ldlm_reprocess_all+0x10/0x20 [ptlrpc]
[16841.294199] [&amp;lt;ffffffffc0dc3d3e&amp;gt;] ldlm_request_cancel+0x14e/0x740 [ptlrpc]
[16841.303307] [&amp;lt;ffffffffc0dc8ada&amp;gt;] ldlm_handle_cancel+0xba/0x250 [ptlrpc]
[16841.312233] [&amp;lt;ffffffffc0dc8dc8&amp;gt;] ldlm_cancel_handler+0x158/0x590 [ptlrpc]
[16841.321356] [&amp;lt;ffffffffc0df9ccb&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[16841.331372] [&amp;lt;ffffffffc0df6b55&amp;gt;] ? ptlrpc_wait_event+0xa5/0x360 [ptlrpc]
[16841.340401] [&amp;lt;ffffffffc0dfd5c4&amp;gt;] ptlrpc_main+0xaf4/0x1fa0 [ptlrpc]
[16841.348851] [&amp;lt;ffffffffc0dfcad0&amp;gt;] ? ptlrpc_register_service+0xf70/0xf70 [ptlrpc]
[16841.358516] [&amp;lt;ffffffff810b4031&amp;gt;] kthread+0xd1/0xe0
&#160;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;I think there&apos;s a good chance this is related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11483&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.whamcloud.com/browse/LU-11483&lt;/a&gt;&#160;, but I haven&apos;t done detailed triage yet.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;If someone from WC wants to take a look, I can make the vmcore available.&#160; (Someone from Cray will take a detailed look eventually, but we haven&apos;t had the chance yet.)&lt;/p&gt;</description>
                <environment></environment>
        <key id="53937">LU-11629</key>
            <summary>MDS panic under load - lu_context_key_get</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="paf">Patrick Farrell</reporter>
                        <labels>
                    </labels>
                <created>Tue, 6 Nov 2018 16:18:01 +0000</created>
                <updated>Wed, 7 Nov 2018 04:40:13 +0000</updated>
                            <resolved>Wed, 7 Nov 2018 04:40:13 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="236470" author="green" created="Tue, 6 Nov 2018 18:08:14 +0000"  >&lt;p&gt;I believe they are the same&lt;/p&gt;</comment>
                            <comment id="236475" author="adilger" created="Tue, 6 Nov 2018 18:23:16 +0000"  >&lt;p&gt;Please follow up in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11483&quot; title=&quot;replay-dual test_25: ofd_lvbo_init()) ASSERTION( env ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11483&quot;&gt;&lt;del&gt;LU-11483&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="236487" author="paf" created="Tue, 6 Nov 2018 20:37:37 +0000"  >&lt;p&gt;OK, thanks!&lt;/p&gt;</comment>
                            <comment id="236489" author="jmiller" created="Tue, 6 Nov 2018 20:43:08 +0000"  >&lt;p&gt;Testing mail delivery for @paf&lt;/p&gt;</comment>
                            <comment id="236501" author="pjones" created="Wed, 7 Nov 2018 00:39:38 +0000"  >&lt;p&gt;Patrick is skeptical that this is a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11483&quot; title=&quot;replay-dual test_25: ofd_lvbo_init()) ASSERTION( env ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11483&quot;&gt;&lt;del&gt;LU-11483&lt;/del&gt;&lt;/a&gt; so reopening until some testing has been run to prove/disprove this theory either way&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="236510" author="paf" created="Wed, 7 Nov 2018 03:19:29 +0000"  >&lt;p&gt;I&apos;m not skeptical of what Oleg did, I think there&apos;s some confusion here, and it would be good to get Oleg to weigh in.&lt;/p&gt;

&lt;p&gt;Essentially, exactly the same problem exists in two places in the code.&#160; One crashes with the signature given in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11483&quot; title=&quot;replay-dual test_25: ofd_lvbo_init()) ASSERTION( env ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11483&quot;&gt;&lt;del&gt;LU-11483&lt;/del&gt;&lt;/a&gt;, one is the signature reported here in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11629&quot; title=&quot;MDS panic under load - lu_context_key_get&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11629&quot;&gt;&lt;del&gt;LU-11629&lt;/del&gt;&lt;/a&gt;.&#160; It&apos;s simply that the same fix has to be applied in two places.&#160; I believe that&apos;s what Oleg was indicating by marking this as a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11483&quot; title=&quot;replay-dual test_25: ofd_lvbo_init()) ASSERTION( env ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11483&quot;&gt;&lt;del&gt;LU-11483&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="236512" author="pjones" created="Wed, 7 Nov 2018 04:40:13 +0000"  >&lt;p&gt;Ah sorry, having now seen that extra context I understand. Thanks for pointing that out!&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="53535">LU-11483</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i005u7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>