<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:32:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3306] layout.c:1946:__req_capsule_get()) @@@ Wrong  buffer for field `name&apos; (6 of 8) in format `LDLM_INTENT_OPEN&apos;: 3 vs. 0 (client)</title>
                <link>https://jira.whamcloud.com/browse/LU-3306</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Fresh master checkout, hit this in racer:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[61845.122433] LustreError: 7885:0:(pack_generic.c:770:lustre_msg_string()) can&apos;t unpack short string in msg ffffc90006c54ff0 buffer[6] len 3: strlen 1
[61845.131556] LustreError: 7885:0:(layout.c:1946:__req_capsule_get()) @@@ Wrong buffer for field `name&apos; (6 of 8) in format `LDLM_INTENT_OPEN&apos;: 3 vs. 0 (client)
[61845.131557]   req@ffff88003c01e7f0 x1434586131710596/t0(0) o101-&amp;gt;13783025-666d-77a1-7c55-b4fef65827f1@0@lo:0/0 lens 576/0 e 0 to 0 dl 1368130054 ref 1 fl Interpret:/0/ffffffff rc 0/-1
[61845.133834] LustreError: 7885:0:(mdt_handler.c:1727:mdt_reint_internal()) Can&apos;t unpack reint, rc -14
[61845.134310] LustreError: 7885:0:(lustre_mdt.h:66:err_serious()) ASSERTION( -rc &amp;lt; ESERIOUS ) failed: 
[61845.134747] LustreError: 7885:0:(lustre_mdt.h:66:err_serious()) LBUG
[61845.134992] Pid: 7885, comm: mdt00_008
[61845.135199] 
[61845.135199] Call Trace:
[61845.135558]  [&amp;lt;ffffffffa0af28a5&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[61845.135817]  [&amp;lt;ffffffffa0af2ea7&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[61845.136103]  [&amp;lt;ffffffffa05c32eb&amp;gt;] mdt_enqueue+0x10b/0x110 [mdt]
[61845.140089]  [&amp;lt;ffffffffa05b4d28&amp;gt;] mdt_handle_common+0x648/0x1660 [mdt]
[61845.140375]  [&amp;lt;ffffffffa05f0f25&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
[61845.140681]  [&amp;lt;ffffffffa12a66c8&amp;gt;] ptlrpc_server_handle_request+0x3a8/0xc70 [ptlrpc]
[61845.141158]  [&amp;lt;ffffffffa0af35ee&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
[61845.141448]  [&amp;lt;ffffffffa0b04e9f&amp;gt;] ? lc_watchdog_touch+0x6f/0x170 [libcfs]
[61845.142006]  [&amp;lt;ffffffffa129de11&amp;gt;] ? ptlrpc_wait_event+0xb1/0x2a0 [ptlrpc]
[61845.142386]  [&amp;lt;ffffffff81054613&amp;gt;] ? __wake_up+0x53/0x70
[61845.143683]  [&amp;lt;ffffffffa12a79d5&amp;gt;] ptlrpc_main+0xa45/0x1650 [ptlrpc]
[61845.143981]  [&amp;lt;ffffffffa12a6f90&amp;gt;] ? ptlrpc_main+0x0/0x1650 [ptlrpc]
[61845.144265]  [&amp;lt;ffffffff8100c10a&amp;gt;] child_rip+0xa/0x20
[61845.144542]  [&amp;lt;ffffffffa12a6f90&amp;gt;] ? ptlrpc_main+0x0/0x1650 [ptlrpc]
[61845.144854]  [&amp;lt;ffffffffa12a6f90&amp;gt;] ? ptlrpc_main+0x0/0x1650 [ptlrpc]
[61845.145134]  [&amp;lt;ffffffff8100c100&amp;gt;] ? child_rip+0x0/0x20
[61845.145399] 
[61845.220106] Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Crashdump and modules are in /exports/crashdumps/192.168.10.210-2013-05-09-16\:06\:54/&lt;br/&gt;
source tag master-20130509&lt;/p&gt;</description>
                <environment></environment>
        <key id="18784">LU-3306</key>
            <summary>layout.c:1946:__req_capsule_get()) @@@ Wrong  buffer for field `name&apos; (6 of 8) in format `LDLM_INTENT_OPEN&apos;: 3 vs. 0 (client)</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="green">Oleg Drokin</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Thu, 9 May 2013 22:50:16 +0000</created>
                <updated>Wed, 16 Oct 2013 01:07:11 +0000</updated>
                            <resolved>Wed, 16 Oct 2013 01:07:11 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                    <fixVersion>Lustre 2.5.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="58241" author="green" created="Sun, 12 May 2013 03:26:36 +0000"  >&lt;p&gt;Had another identical crash happen, so I guess this is more than just some sort of random memory corruption.&lt;br/&gt;
Same codebase, crash in /exports/crashdumps/192.168.10.220-2013-05-11-01\:46\:08/&lt;/p&gt;</comment>
                            <comment id="58287" author="green" created="Mon, 13 May 2013 16:21:56 +0000"  >&lt;p&gt;and another one, /exports/crashdumps/192.168.10.220-2013-05-13-06\:05\:21/&lt;/p&gt;</comment>
                            <comment id="58823" author="green" created="Sat, 18 May 2013 01:25:45 +0000"  >&lt;p&gt;Well, I am hitting this pretty regularly again and again, so I did some digging and here&apos;s what I uncovered:&lt;br/&gt;
on a client in the log (for failed request):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00200000:3.0:1368694813.256766:0:21370:0:(dcache.c:401:ll_revalidate_it
()) VFS Op:name=13,intent=open
...
00000002:00010000:3.0:1368694813.266750:0:21370:0:(mdc_locks.c:1083:mdc_intent_lock()) (name: 13,[0x200000401:0x330:0x0]) in obj [0x200000400:0x1:0x0], intent: open flags 02304001
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I found op_data for this request and it&apos;s:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; p *(struct md_op_data *)0xffff88000b5d7df0
$3 = {
  op_fid1 = {
    f_seq = 8589935616, 
    f_oid = 1, 
    f_ver = 0
  }, 
  op_fid2 = {
    f_seq = 8589935617, 
    f_oid = 816, 
    f_ver = 0
  }, 
...
  op_name = 0xffff8800b5076fd0 &quot;2&quot;,    &amp;lt;====
  op_namelen = 2,                      &amp;lt;==== !!
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So op_data has different name than what was reported in the log, but the length is the same.&lt;br/&gt;
What really happened, I think is some sort of rename succeeded in between we got the revalidate request from the kernel and sending the message for over the wire sending that replaced the name in dentry (I also found dentry).&lt;br/&gt;
In fact here it is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00200000:1.0:1368694813.261175:0:21387:0:(namei.c:1306:ll_rename_generic()) VFS Op:oldname=13,src_dir=144115205255725057/33554436(ffff880048d9cb08),newname=2,tgt_dir=144115205255725057/33554436(ffff880048d9cb08)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This is all thanks to us not storing the name in op_data, but merely a pointer to some other location (dentry in this case, but e.g. in 3270 there is a bug of similar nature where op_data points to sai entry that gets freed unexpectedly).&lt;/p&gt;

&lt;p&gt;So after sending this malformed request, on the server side our string check fails (can&apos;t unpack short string) because we expect string length 2, but got only 1, this in turn returns error to mdt_reint_internal:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        rc = mdt_reint_unpack(info, op);
        if (rc != 0) {
                CERROR(&quot;Can&apos;t unpack reint, rc %d\n&quot;, rc);
                RETURN(err_serious(rc));
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;That sets err_serious on the rc.&lt;/p&gt;

&lt;p&gt;Now we return all the way through the stack to mdt_enqueue, and there:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        return rc ? err_serious(rc) : req-&amp;gt;rq_status;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So we call err_serious(rc) AGAIN which triggers the assertion.&lt;/p&gt;

&lt;p&gt;I think we should just remove the assertion and allow double err_serious setting, as otherwise we will need to go through all the callchains to ensure it is never called twice, yet everywhere where it&apos;s needed it should be called once for sure, which might be quite cumbersome.&lt;/p&gt;</comment>
                            <comment id="58825" author="green" created="Sat, 18 May 2013 03:21:41 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/6383&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/6383&lt;/a&gt; is err_serious change that I think is the most important. Applying it prevents the server from crashing on data from network.&lt;/p&gt;</comment>
                            <comment id="59798" author="jlevi" created="Fri, 31 May 2013 20:57:03 +0000"  >&lt;p&gt;Are more patches needed than Change, 6383 that has landed to master? Or can this ticket be closed?&lt;/p&gt;</comment>
                            <comment id="69056" author="jlevi" created="Wed, 16 Oct 2013 01:07:11 +0000"  >&lt;p&gt;Closing ticket as patch landed to Master. Please let me know if more work is needed in this ticket and I will reopen.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvqlb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>8192</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>