<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:28:53 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2867] 2.1.4&lt;-&gt;2.4.0 interop: parallel-scale test_compilebench: IOError: [Errno 71] Protocol error</title>
                <link>https://jira.whamcloud.com/browse/LU-2867</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The parallel-scale test compilebench failed as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;IOError: [Errno 71] Protocol error
 parallel-scale test_compilebench: @@@@@@ FAIL: compilebench failed: 1 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Console log on the client node client-19vm1 showed that:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;16:48:59:Lustre: DEBUG MARKER: ./compilebench -D /mnt/lustre/d0.compilebench -i 2 -r 2 --makej
16:54:22:LustreError: 23937:0:(pack_generic.c:413:lustre_msg_buf_v2()) msg ffff880042c05cc0 buffer[2] size 0 too small (required 40, opc=101)
16:54:22:LustreError: 23937:0:(layout.c:1659:__req_capsule_get()) @@@ Wrong buffer for field `dlm_lvb&apos; (2 of 3) in format `LDLM_ENQUEUE_LVB&apos;: 0 vs. 40 (server)
16:54:22:  req@ffff880042c05800 x1427708753202028/t0(0) o101-&amp;gt;lustre-OST0001-osc-ffff880075eeb000@10.10.2.223@tcp:28/4 lens 296/312 e 0 to 0 dl 1361581007 ref 1 fl Interpret:R/0/0 rc 0/0
16:54:22:LustreError: 23937:0:(pack_generic.c:413:lustre_msg_buf_v2()) msg ffff880042c05cc0 buffer[2] size 0 too small (required 40, opc=101)
16:54:22:LustreError: 23937:0:(pack_generic.c:413:lustre_msg_buf_v2()) Skipped 1 previous similar message
16:54:22:LustreError: 23937:0:(layout.c:1659:__req_capsule_get()) @@@ Wrong buffer for field `dlm_lvb&apos; (2 of 3) in format `LDLM_ENQUEUE_LVB&apos;: 0 vs. 40 (server)
16:54:22:  req@ffff880042c05800 x1427708753202030/t0(0) o101-&amp;gt;lustre-OST0001-osc-ffff880075eeb000@10.10.2.223@tcp:28/4 lens 296/312 e 0 to 0 dl 1361581023 ref 1 fl Interpret:R/0/0 rc 0/0
16:54:22:LustreError: 23937:0:(layout.c:1659:__req_capsule_get()) Skipped 1 previous similar message
16:54:22:Lustre: DEBUG MARKER: /usr/sbin/lctl mark  parallel-scale test_compilebench: @@@@@@ FAIL: compilebench failed: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/2bef1a90-7d79-11e2-85d0-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/2bef1a90-7d79-11e2-85d0-52540035b04c&lt;/a&gt;&lt;/p&gt;</description>
                <environment>&lt;br/&gt;
Lustre b2_1 client build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b2_1/176&quot;&gt;http://build.whamcloud.com/job/lustre-b2_1/176&lt;/a&gt;&lt;br/&gt;
Lustre master server build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1269&quot;&gt;http://build.whamcloud.com/job/lustre-master/1269&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.3/x86_64&lt;br/&gt;
</environment>
        <key id="17698">LU-2867</key>
            <summary>2.1.4&lt;-&gt;2.4.0 interop: parallel-scale test_compilebench: IOError: [Errno 71] Protocol error</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="yujian">Jian Yu</reporter>
                        <labels>
                            <label>HB</label>
                    </labels>
                <created>Tue, 26 Feb 2013 01:24:58 +0000</created>
                <updated>Thu, 7 Mar 2013 06:55:11 +0000</updated>
                            <resolved>Thu, 7 Mar 2013 02:23:11 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                    <version>Lustre 2.1.4</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="53080" author="yujian" created="Wed, 27 Feb 2013 02:09:45 +0000"  >&lt;p&gt;Hi Nasf,&lt;/p&gt;

&lt;p&gt;Is this a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2645&quot; title=&quot;1.8&amp;lt;-&amp;gt;2.4 interop: enqueue objid 0x2 subobj 0x1 on OST idx 0: rc -5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2645&quot;&gt;&lt;del&gt;LU-2645&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="53136" author="yong.fan" created="Wed, 27 Feb 2013 18:29:17 +0000"  >&lt;p&gt;I do not think so. &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2645&quot; title=&quot;1.8&amp;lt;-&amp;gt;2.4 interop: enqueue objid 0x2 subobj 0x1 on OST idx 0: rc -5&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2645&quot;&gt;&lt;del&gt;LU-2645&lt;/del&gt;&lt;/a&gt; is the convert case, server give larger size than required. I need more investigation for this one.&lt;/p&gt;</comment>
                            <comment id="53184" author="bobijam" created="Thu, 28 Feb 2013 13:09:56 +0000"  >&lt;p&gt;I think it must has something to do with variable sized LVB support (&lt;a href=&quot;http://review.whamcloud.com/3965&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3965&lt;/a&gt;) which on the server side mdt_intent_policy() set the DLM_LVB field of server buf with 0 size&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;mdt_intent_policy()&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        } &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; {
                &lt;span class=&quot;code-comment&quot;&gt;/* No intent was provided */&lt;/span&gt;
                LASSERT(pill-&amp;gt;rc_fmt == &amp;amp;RQF_LDLM_ENQUEUE);
                req_capsule_set_size(pill, &amp;amp;RMF_DLM_LVB, RCL_SERVER, 0);
                rc = req_capsule_server_pack(pill);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="53436" author="yong.fan" created="Wed, 6 Mar 2013 10:01:54 +0000"  >&lt;p&gt;Bobijam, I do not think it is the code section you mentioned above caused the failure. Because the failed lock was a EXT lock, not an IBITS lock. In fact, the directly reason for the failure was the OST out of memory.&lt;/p&gt;

&lt;p&gt;The log on client:&lt;br/&gt;
====================&lt;br/&gt;
00000020:00010000:0.0:1361580860.953069:0:15378:0:(cl_lock.c:143:cl_lock_trace0()) enqueue lock: ffff880022365778@(2 ffff880078e1d500 1 0 0 1 1 0)(ffff8800457fa148/1/0) at cl_enqueue_try():1193&lt;br/&gt;
00010000:00010000:0.0:1361580860.953083:0:15378:0:(ldlm_lock.c:687:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(PR) ns: lustre-OST0001-osc-ffff880075eeb000 lock: ffff88007a0ced80/0xde77b732306ea406 lrc: 3/1,0 mode: -&lt;del&gt;/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;0&amp;#93;&lt;/span&gt; (req 0&lt;/del&gt;&amp;gt;0) flags: 0x0 remote: 0x0 expref: -99 pid: 15378 timeout 0&lt;br/&gt;
00010000:00010000:0.0:1361580860.953087:0:15378:0:(ldlm_request.c:835:ldlm_cli_enqueue()) ### client-side enqueue START, flags 1000&lt;br/&gt;
 ns: lustre-OST0001-osc-ffff880075eeb000 lock: ffff88007a0ced80/0xde77b732306ea406 lrc: 3/1,0 mode: -&lt;del&gt;/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0&lt;/del&gt;&amp;gt;18446744073709551615) flags: 0x0 remote: 0x0 expref: -99 pid: 15378 timeout 0&lt;br/&gt;
00000020:00010000:0.0:1361580860.953095:0:15378:0:(cl_lock.c:143:cl_lock_trace0()) wait lock try: ffff880022365778@(3 ffff880078e1d500 1 2 0 1 1 0)(ffff8800457fa148/1/0) at cl_wait_try():1453&lt;br/&gt;
00000020:00010000:0.0:1361580860.953098:0:15378:0:(cl_lock.c:143:cl_lock_trace0()) disclosure lock: ffff880022365938@(3 ffff880078e1d500 2 1 0 1 1 0)(ffff8800457fa878/0/3) at cl_lock_disclosure():1718&lt;br/&gt;
00000020:00010000:0.0:1361580860.953101:0:15378:0:(cl_lock.c:143:cl_lock_trace0()) state wait lock: ffff880022365938@(2 ffff880078e1d500 1 1 0 1 1 0)(ffff8800457fa878/0/1) at cl_lock_state_wait():961&lt;br/&gt;
00000100:00100000:0.0:1361580860.953109:0:23937:0:(client.c:1434:ptlrpc_send_new_req()) Sending RPC pname:cluuid:pid:xid:nid:opc ptlrpcd:899b23b2-359c-4238-1ba8-ff7de7e0c945:23937:1427708753202028:10.10.2.223@tcp:101&lt;br/&gt;
00010000:00010000:0.0:1361580861.005537:0:23937:0:(ldlm_lock.c:687:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(PR) ns: lustre-OST0001-osc-ffff880075eeb000 lock: ffff88007a0ced80/0xde77b732306ea406 lrc: 6/2,0 mode: -&lt;del&gt;/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0&lt;/del&gt;&amp;gt;18446744073709551615) flags: 0x0 remote: 0x0 expref: -99 pid: 15378 timeout 0&lt;br/&gt;
00010000:00010000:0.0:1361580861.005549:0:23937:0:(ldlm_request.c:599:ldlm_cli_enqueue_fini()) ### client-side enqueue, new policy data ns: lustre-OST0001-osc-ffff880075eeb000 lock: ffff88007a0ced80/0xde77b732306ea406 lrc: 6/2,0 mode: -&lt;del&gt;/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0&lt;/del&gt;&amp;gt;18446744073709551615) flags: 0x0 remote: 0xaa3ee0fd27648cf2 expref: -99 pid: 15378 timeout 0&lt;br/&gt;
00000100:00020000:0.0:1361580861.005553:0:23937:0:(pack_generic.c:413:lustre_msg_buf_v2()) msg ffff880042c05cc0 buffer&lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt; size 0 too small (required 40, opc=101)&lt;br/&gt;
00000100:00020000:0.0:1361580861.006865:0:23937:0:(layout.c:1659:__req_capsule_get()) @@@ Wrong buffer for field `dlm_lvb&apos; (2 of 3) in format `LDLM_ENQUEUE_LVB&apos;: 0 vs. 40 (server)&lt;br/&gt;
====================&lt;/p&gt;

&lt;p&gt;Searching the failed lock &quot;0xde77b732306ea406&quot; in OST log, we can find that:&lt;br/&gt;
====================&lt;br/&gt;
==&amp;gt; 00010000:00020000:0.0:1361580860.999665:0:1514:0:(ldlm_resource.c:1161:ldlm_resource_get()) lvbo_init failed for resource 679272: rc -12&lt;br/&gt;
00010000:00010000:0.0:1361580861.001913:0:1514:0:(ldlm_lockd.c:1282:ldlm_handle_enqueue0()) ### server-side enqueue handler, new lock created ns: filter-ffff88001721d000 lock: ffff880060a66000/0xaa3ee0fd27648cf2 lrc: 2/0,0 mode: -&lt;del&gt;/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;0&amp;#93;&lt;/span&gt; (req 0&lt;/del&gt;&amp;gt;0) flags: 0x0 nid: local remote: 0xde77b732306ea406 expref: -99 pid: 1514 timeout: 0 lvb_type: 0&lt;br/&gt;
00010000:00010000:0.0:1361580861.001930:0:1514:0:(ldlm_extent.c:314:ldlm_check_contention()) contended locks = 0&lt;br/&gt;
00010000:00010000:0.0:1361580861.001931:0:1514:0:(ldlm_extent.c:314:ldlm_check_contention()) contended locks = 0&lt;br/&gt;
00010000:00010000:0.0:1361580861.001936:0:1514:0:(ldlm_resource.c:1270:ldlm_resource_add_lock()) ### About to add this lock:&lt;br/&gt;
 ns: filter-ffff88001721d000 lock: ffff880060a66000/0xaa3ee0fd27648cf2 lrc: 3/0,0 mode: PR/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0-&amp;gt;18446744073709551615) flags: 0x0 nid: 10.10.4.220@tcp remote: 0xde77b732306ea406 expref: 4042 pid: 1514 timeout: 0 lvb_type: 1&lt;br/&gt;
00010000:00010000:0.0:1361580861.001945:0:1514:0:(ldlm_lockd.c:1414:ldlm_handle_enqueue0()) ### server-side enqueue handler, sending reply(err=0, rc=0) ns: filter-ffff88001721d000 lock: ffff880060a66000/0xaa3ee0fd27648cf2 lrc: 3/0,0 mode: PR/PR res: 679272/0 rrc: 1 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0-&amp;gt;18446744073709551615) flags: 0x0 nid: 10.10.4.220@tcp remote: 0xde77b732306ea406 expref: 4042 pid: 1514 timeout: 0 lvb_type: 1&lt;br/&gt;
00010000:00010000:0.0:1361580861.001952:0:1514:0:(ldlm_lockd.c:1447:ldlm_handle_enqueue0()) ### server-side enqueue handler END (lock ffff880060a66000, rc 0)&lt;br/&gt;
====================&lt;/p&gt;

&lt;p&gt;As you can see, &quot;ldlm_resource_get()&quot; failed for &quot;-ENOMEM&quot;: ldlm_resource_get() ==&amp;gt; ofd_lvbo_init() ==&amp;gt; OBD_ALLOC_PTR(lvb). That means the &quot;lvb&quot; on the lock resource was NULL because of not enough memory at that moment. But the ldlm_handle_enqueue0() ignored such &quot;lvb&quot; error and went ahead, then later, it packed zero-sized &quot;lvb&quot; into the reply. So it was NOT variable-sized LVB issue, but some memory issue.&lt;/p&gt;</comment>
                            <comment id="53511" author="yong.fan" created="Thu, 7 Mar 2013 02:22:20 +0000"  >&lt;p&gt;Cannot allocate &quot;lvb&quot; for ext lock&lt;/p&gt;</comment>
                            <comment id="53512" author="yong.fan" created="Thu, 7 Mar 2013 02:23:11 +0000"  >&lt;p&gt;It is a duplication of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2790&quot; title=&quot;Failure to allocated osd keys leads to ofd_intent_policy()) ASSERTION( res_lvb != ((void *)0) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2790&quot;&gt;&lt;del&gt;LU-2790&lt;/del&gt;&lt;/a&gt; for failed to allocate &quot;lvb&quot; for ext lock.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="17521">LU-2790</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvjtj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6933</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>