<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:24:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16206] PCC crashes MDS: mdt_big_xattr_get()) ASSERTION( info-&gt;mti_big_lmm_used == 0 ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-16206</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Reproducible on 2.15.1 and 2.14.0.&#160; Both clients and servers are running Ubuntu 18.04 as shown in Environment.&lt;/p&gt;

&lt;p&gt;Steps to reproduce:&lt;/p&gt;



&lt;p&gt;&lt;tt&gt;# confirm hsm is enabled&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;mds-node:~# lctl get_param mdt.lustrefs-MDT0000.hsm_control&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;mdt.lustrefs-MDT0000.hsm_control=enabled&lt;/tt&gt;&lt;/p&gt;

&lt;p&gt;&lt;tt&gt;# setup pcc on client 0&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# mkdir /pcc&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# chmod 777 /pcc /lustre&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# lhsmtool_posix --daemon --hsm-root /pcc --archive=2 /lustre &amp;lt; /dev/null &amp;gt; /tmp/copytool_log 2&amp;gt;&amp;amp;1&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# lctl pcc add /lustre /pcc -p &quot;gid={0},gid={2001} rwid=2&quot;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;# setup pcc on client 1&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# mkdir /pcc&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# chmod 777 /pcc /lustre&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# lhsmtool_posix --daemon --hsm-root /pcc --archive=3 /lustre &amp;lt; /dev/null &amp;gt; /tmp/copytool_log 2&amp;gt;&amp;amp;1&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# lctl pcc add /lustre /pcc -p &quot;gid={0},gid={2001} rwid=3&quot;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;# create file on client 0 and confirm in-cache&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# echo &quot;test&quot; &amp;gt; /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# lfs pcc state /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;file: /lustre/test, type: readwrite, PCC file: /pcc/0001/0000/0402/0000/0002/0000/0x200000402:0x1:0x0, user number: 0, flags: 0&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;# read file from client 1&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# lfs pcc state /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;file: /lustre/test, type: none&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# cat /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;cat: /lustre/test: No data available&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# cat /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# lfs pcc state /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;file: /lustre/test, type: none&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;# check pcc state, and attempt to attach again on client 0&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# lfs pcc state /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;file: /lustre/test, type: none&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-0:~# lfs pcc attach -i 2 /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;^C^C^C^C^C^C^C^C^C &#160; &amp;lt;---- hang&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;# while client 0 is hanging, check state on client 1&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;client-1:~# lfs pcc state /lustre/test&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;^C^C^C^C &#160;&amp;lt;---- hang&lt;/tt&gt;&lt;/p&gt;

&lt;p&gt;Minutes later things resolve and the stuck command lines return. &#160;Examining the MDS, it crashed and rebooted. &#160;Relevant&lt;br/&gt;
output from dmesg:&lt;/p&gt;

&lt;p&gt;&lt;tt&gt;[ 3266.211270] LustreError: 11458:0:(mdt_handler.c:960:mdt_big_xattr_get()) ASSERTION( info-&amp;gt;mti_big_lmm_used == 0 ) failed:&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.217023] LustreError: 11458:0:(mdt_handler.c:960:mdt_big_xattr_get()) LBUG&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.220653] Pid: 11458, comm: mdt_rdpg02_001 5.4.0-1091-azure #96~18.04.1-Ubuntu SMP Tue Aug 30 19:15:32 UTC 2022&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.220653] Call Trace TBD:&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.220654] Kernel panic - not syncing: LBUG&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.222778] CPU: 8 PID: 11458 Comm: mdt_rdpg02_001 Kdump: loaded Tainted: P &#160; &#160; &#160; &#160; &#160; OE &#160; &#160; 5.4.0-1091-azure #96~18.04.1-Ubuntu&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090008 &#160;12/07/2018&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] Call Trace:&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;dump_stack+0x57/0x6d&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;panic+0xf8/0x2d4&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;lbug_with_loc+0x89/0x2c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;mdt_big_xattr_get+0x398/0x8b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;? mdd_read_unlock+0x2d/0xc0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;? mdd_readpage+0x1919/0x1ed0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;__mdt_stripe_get+0x1d4/0x430 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;mdt_attr_get_complex+0x56e/0x1af0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;mdt_mfd_close+0x2062/0x41c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;? lustre_msg_buf+0x17/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;? __req_capsule_offset+0x5ae/0x6e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.224582] &#160;mdt_close_internal+0x1f0/0x250 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;mdt_close+0x483/0x13f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;tt&gt;[ 3266.259003] &#160;tgt_request_handle+0xc9a/0x1950 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;? lustre_msg_get_transno+0x22/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;ptlrpc_register_service+0x25e6/0x4610 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;? __switch_to_asm+0x34/0x70&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;kthread+0x121/0x140&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;? ptlrpc_register_service+0x1590/0x4610 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;? kthread_park+0x90/0x90&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] &#160;ret_from_fork+0x35/0x40&lt;/tt&gt;&lt;br/&gt;
&lt;tt&gt;[ 3266.259003] Kernel Offset: 0x1be00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)&lt;/tt&gt;&lt;/p&gt;</description>
                <environment>Linux 5.4.0-1091-azure #96~18.04.1-Ubuntu SMP Tue Aug 30 19:15:32 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux&lt;br/&gt;
</environment>
        <key id="72647">LU-16206</key>
            <summary>PCC crashes MDS: mdt_big_xattr_get()) ASSERTION( info-&gt;mti_big_lmm_used == 0 ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="elliswilson">Ellis Wilson</reporter>
                        <labels>
                    </labels>
                <created>Tue, 4 Oct 2022 16:08:01 +0000</created>
                <updated>Tue, 4 Oct 2022 16:10:42 +0000</updated>
                                            <version>Lustre 2.14.0</version>
                    <version>Lustre 2.15.1</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>1</watches>
                                                                            <comments>
                            <comment id="348658" author="JIRAUSER17317" created="Tue, 4 Oct 2022 16:10:42 +0000"  >&lt;p&gt;I&apos;ve linked three potentially related bugs.&#160; The last one has a description that&apos;s particularly enlightening:&lt;/p&gt;

&lt;p&gt;&quot;This is result of inappropriate usage of mti_big_lmm buffer in various places. Originally it was introduced to be used for getting big LOV/LMV EA and passing them to reply buffers. Meanwhile it is widely used now for internal server needs. These cases should be distinguished and if there is no intention to return EA in reply then flag {mti_big_lmm_used}} should not be set. Maybe it is worth to rename it as&#160;&lt;tt&gt;mti_big_lmm_keep&lt;/tt&gt;&#160;to mark that is to be kept until reply is packed.&quot;&lt;br/&gt;
&#160;&lt;br/&gt;
This aligns with a comment about the non-internal version of get_stripe:&lt;/p&gt;

&lt;p&gt;&lt;tt&gt;&#160; LASSERT(!info-&amp;gt;mti_big_lmm_used);&lt;/tt&gt;&lt;/p&gt;

&lt;p&gt;&#160; &#160; rc = __mdt_stripe_get(info, o, ma, name);&lt;br/&gt;
&#160; &#160; /* since big_lmm is always used here, clear &apos;used&apos; flag to avoid&lt;br/&gt;
&#160; &#160; &#160;* assertion in mdt_big_xattr_get().&lt;br/&gt;
&#160; &#160; &#160;*/&lt;tt&gt;&#160; &#160; info-&amp;gt;mti_big_lmm_used = 0;&lt;/tt&gt;&lt;br/&gt;
&#160;&lt;br/&gt;
I wonder if a codepath is being tickled that is (ab)using mit_big_lmm_used in a similar fashion that&apos;s not covered like this code path is.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="60108">LU-13816</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="59318">LU-13599</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="59397">LU-13615</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i031z3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>