<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:07:53 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-524] Test failure on test suite sanityn, subtest test_40c</title>
                <link>https://jira.whamcloud.com/browse/LU-524</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Chris Gearing &amp;lt;chris@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/69d07264-b345-11e0-b33f-52540025f9af&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/69d07264-b345-11e0-b33f-52540025f9af&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_40c failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;link is blocked&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;This error is occurring regularly on many tests and preventing successful completion.&lt;/p&gt;</description>
                <environment></environment>
        <key id="11371">LU-524</key>
            <summary>Test failure on test suite sanityn, subtest test_40c</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="2">Won&apos;t Fix</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Thu, 21 Jul 2011 04:21:13 +0000</created>
                <updated>Mon, 25 Jul 2011 12:23:37 +0000</updated>
                            <resolved>Mon, 25 Jul 2011 12:22:30 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="18040" author="yong.fan" created="Thu, 21 Jul 2011 08:11:09 +0000"  >&lt;p&gt;It is very strange issue, the failure reason is that:&lt;/p&gt;

&lt;p&gt;on MDS side:&lt;br/&gt;
===================&lt;br/&gt;
00000100:00100000:0.0:1311197986.353819:0:3846:0:(service.c:1705:ptlrpc_server_handle_request()) Handling RPC pname:cluuid+ref:pid:xid:nid:opc mdt_00:77dcbafc-6af0-ecc4-18e6-bd80fa9cb83b+11:23254:x1374890243444654:12345-10.10.4.100@tcp:36&lt;br/&gt;
00010000:00010000:0.0:1311197986.353833:0:3846:0:(ldlm_lockd.c:1412:ldlm_request_cancel()) ### server-side cancel handler START: 1 locks, starting at 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353836:0:3846:0:(ldlm_lockd.c:511:ldlm_del_waiting_lock()) ### wasn&apos;t waiting ns: mdt-ffff81000af3b800 lock: ffff810079199b40/0x5897b4e9bb6dc7d6 lrc: 3/0,0 mode: PR/PR res: 8589941617/10041 bits 0x3 rrc: 2 type: IBT flags: 0x4000000 remote: 0x21fb2945a70a2a86 expref: 11 pid: 3846 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353855:0:3846:0:(ldlm_lockd.c:511:ldlm_del_waiting_lock()) ### wasn&apos;t waiting ns: mdt-ffff81000af3b800 lock: ffff810079199b40/0x5897b4e9bb6dc7d6 lrc: 3/0,0 mode: PR/PR res: 8589941617/10041 bits 0x3 rrc: 2 type: IBT flags: 0x24000080 remote: 0x21fb2945a70a2a86 expref: 11 pid: 3846 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353862:0:3846:0:(ldlm_lock.c:195:ldlm_lock_put()) ### final lock_put on destroyed lock, freeing it. ns: mdt-ffff81000af3b800 lock: ffff810079199b40/0x5897b4e9bb6dc7d6 lrc: 0/0,0 mode: --/PR res: 8589941617/10041 bits 0x3 rrc: 2 type: IBT flags: 0x24000080 remote: 0x21fb2945a70a2a86 expref: 11 pid: 3846 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353868:0:3846:0:(ldlm_lockd.c:1447:ldlm_request_cancel()) ### server-side cancel handler END&lt;br/&gt;
00010000:00010000:0.0:1311197986.353874:0:3846:0:(ldlm_lock.c:668:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(CW) ns: mdt-ffff81000af3b800 lock: ffff81004fb9fd80/0x5897b4e9bb6dc7dd lrc: 3/0,1 mode: --/CW res: 3660161/2491890387 bits 0x0 rrc: 6 type: IBT flags: 0x0 remote: 0x0 expref: -99 pid: 3846 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353880:0:3846:0:(ldlm_lock.c:607:ldlm_add_bl_work_item()) ### lock incompatible; sending blocking AST. ns: mdt-ffff81000af3b800 lock: ffff810055893900/0x5897b4e9bb6dc7ba lrc: 3/1,0 mode: --/PR res: 3660161/2491890387 bits 0x3 rrc: 6 type: IBT flags: 0x4004000 remote: 0x0 expref: -99 pid: 4096 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353886:0:3846:0:(ldlm_request.c:309:ldlm_blocking_ast_nocheck()) ### Lock still has references, will be cancelled later ns: mdt-ffff81000af3b800 lock: ffff810055893900/0x5897b4e9bb6dc7ba lrc: 4/1,0 mode: --/PR res: 3660161/2491890387 bits 0x3 rrc: 6 type: IBT flags: 0x4004030 remote: 0x0 expref: -99 pid: 4096 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197986.353892:0:3846:0:(ldlm_request.c:224:ldlm_completion_ast()) ### client-side enqueue returned a blocked lock, sleeping ns: mdt-ffff81000af3b800 lock: ffff81004fb9fd80/0x5897b4e9bb6dc7dd lrc: 3/0,1 mode: --/CW res: 3660161/2491890387 bits 0x2 rrc: 6 type: IBT flags: 0x4004000 remote: 0x0 expref: -99 pid: 3846 timeout: 0&lt;br/&gt;
===================&lt;/p&gt;

&lt;p&gt;So the second link operation in sanityn test_40c (mdt_00) was blocked by the lock &quot;ffff810055893900/0x5897b4e9bb6dc7ba&quot;. Such lock was created by another mdt thread: mdt_02 (4096)&lt;/p&gt;

&lt;p&gt;===================&lt;br/&gt;
00000100:00100000:0.0:1311197985.450701:0:4096:0:(service.c:1705:ptlrpc_server_handle_request()) Handling RPC pname:cluuid+ref:pid:xid:nid:opc mdt_02:4ba32714-4664-4650-51ba-87e8d8be7d5f+11:23245:x1374890243444652:12345-10.10.4.100@tcp:101&lt;br/&gt;
00010000:00010000:0.0:1311197985.450711:0:4096:0:(ldlm_lockd.c:1048:ldlm_handle_enqueue0()) ### server-side enqueue handler START&lt;br/&gt;
00010000:00010000:0.0:1311197985.450722:0:4096:0:(ldlm_lockd.c:1133:ldlm_handle_enqueue0()) ### server-side enqueue handler, new lock created ns: mdt-ffff81000af3b800 lock: ffff81004fb9fb40/0x5897b4e9bb6dc7b3 lrc: 2/0,0 mode: --/CR res: 3660161/2491890387 bits 0x0 rrc: 4 type: IBT flags: 0x0 remote: 0x21fb2945a70a2a7f expref: -99 pid: 4096 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197985.450765:0:4096:0:(ldlm_lock.c:668:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(PR) ns: mdt-ffff81000af3b800 lock: ffff810055893900/0x5897b4e9bb6dc7ba lrc: 3/1,0 mode: --/PR res: 3660161/2491890387 bits 0x0 rrc: 5 type: IBT flags: 0x0 remote: 0x0 expref: -99 pid: 4096 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197985.450772:0:4096:0:(ldlm_lock.c:607:ldlm_add_bl_work_item()) ### lock incompatible; sending blocking AST. ns: mdt-ffff81000af3b800 lock: ffff8100502da900/0x5897b4e9bb6dc774 lrc: 2/0,1 mode: CW/CW res: 3660161/2491890387 bits 0x2 rrc: 5 type: IBT flags: 0x4004000 remote: 0x0 expref: -99 pid: 3847 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197985.450780:0:4096:0:(ldlm_request.c:309:ldlm_blocking_ast_nocheck()) ### Lock still has references, will be cancelled later ns: mdt-ffff81000af3b800 lock: ffff8100502da900/0x5897b4e9bb6dc774 lrc: 3/0,1 mode: CW/CW res: 3660161/2491890387 bits 0x2 rrc: 5 type: IBT flags: 0x4004030 remote: 0x0 expref: -99 pid: 3847 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197985.450800:0:4096:0:(ldlm_request.c:224:ldlm_completion_ast()) ### client-side enqueue returned a blocked lock, sleeping ns: mdt-ffff81000af3b800 lock: ffff810055893900/0x5897b4e9bb6dc7ba lrc: 3/1,0 mode: --/PR res: 3660161/2491890387 bits 0x3 rrc: 5 type: IBT flags: 0x4004000 remote: 0x0 expref: -99 pid: 4096 timeout: 0&lt;br/&gt;
===================&lt;/p&gt;

&lt;p&gt;Means mdt_02 created the lock &quot;ffff810055893900/0x5897b4e9bb6dc7ba&quot;, but it was blocked by another lock &quot;ffff8100502da900/0x5897b4e9bb6dc774&quot;, the later lock was held by mdt_01 (3847), which was just the thread was processing the first link operation and slept for &quot;OBD_FAIL_MDS_PDO_LOCK&quot;.&lt;/p&gt;

&lt;p&gt;Then back to study the client-side log, the lock &quot;ffff810055893900/0x5897b4e9bb6dc7ba&quot; was triggered by the RPC of &quot;x1374890243444652&quot;&lt;/p&gt;

&lt;p&gt;===================&lt;br/&gt;
00000080:00200000:0.0:1311197985.451290:0:23245:0:(file.c:2190:__ll_inode_revalidate_it()) VFS Op:inode=3660161/2491890387(ffff81006ae87b50),name=/&lt;br/&gt;
00000002:00010000:0.0:1311197985.451306:0:23245:0:(mdc_locks.c:917:mdc_intent_lock()) (name: ,&lt;span class=&quot;error&quot;&gt;&amp;#91;0x37d981:0x94873ad3:0x0&amp;#93;&lt;/span&gt;) in obj &lt;span class=&quot;error&quot;&gt;&amp;#91;0x37d981:0x94873ad3:0x0&amp;#93;&lt;/span&gt;, intent: getattr flags 00&lt;br/&gt;
00010000:00010000:0.0:1311197985.451316:0:23245:0:(ldlm_lock.c:1208:ldlm_lock_match()) ### not matched ns ffff81006a88e000 type 13 mode 30 res 3660161/2491890387 (0 0)&lt;br/&gt;
00010000:00010000:0.0:1311197985.451360:0:23245:0:(ldlm_lock.c:668:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(CR) ns: lustre-MDT0000-mdc-ffff81006b0dcc00 lock: ffff81006ad28000/0x21fb2945a70a2a7f lrc: 3/1,0 mode: --/CR res: 3660161/2491890387 bits 0x0 rrc: 2 type: IBT flags: 0x0 remote: 0x0 expref: -99 pid: 23245 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197985.451366:0:23245:0:(ldlm_request.c:830:ldlm_cli_enqueue()) ### client-side enqueue START ns: lustre-MDT0000-mdc-ffff81006b0dcc00 lock: ffff81006ad28000/0x21fb2945a70a2a7f lrc: 3/1,0 mode: --/CR res: 3660161/2491890387 bits 0x2 rrc: 2 type: IBT flags: 0x0 remote: 0x0 expref: -99 pid: 23245 timeout: 0&lt;br/&gt;
00010000:00010000:0.0:1311197985.451371:0:23245:0:(ldlm_request.c:892:ldlm_cli_enqueue()) ### sending request ns: lustre-MDT0000-mdc-ffff81006b0dcc00 lock: ffff81006ad28000/0x21fb2945a70a2a7f lrc: 3/1,0 mode: --/CR res: 3660161/2491890387 bits 0x2 rrc: 2 type: IBT flags: 0x0 remote: 0x0 expref: -99 pid: 23245 timeout: 0&lt;br/&gt;
00000100:00100000:0.0:1311197985.451382:0:23245:0:(client.c:1392:ptlrpc_send_new_req()) Sending RPC pname:cluuid:pid:xid:nid:opc updatedb:4ba32714-4664-4650-51ba-87e8d8be7d5f:23245:1374890243444652:10.10.4.102@tcp:101&lt;br/&gt;
===================&lt;/p&gt;

&lt;p&gt;As shown above, it was the thread 23245 that triggered the RPC &quot;x1374890243444652&quot;. Unfortunately, we do not know what the thread 23245 want to do, it was not any threads related with sanityn test_40c, it looks like a system daemon to scan the filesystem.&lt;/p&gt;</comment>
                            <comment id="18041" author="yong.fan" created="Thu, 21 Jul 2011 08:14:41 +0000"  >&lt;p&gt;The conclusion is that:&lt;/p&gt;

&lt;p&gt;the second link was blocked by some unknown ops, and the unknown ops was blocked by the first link, but if without such unknown ops, the second link would not be blocked by the first link. So we should find out what the unknown ops was for and how to remove such unknown ops.&lt;/p&gt;</comment>
                            <comment id="18052" author="pjones" created="Thu, 21 Jul 2011 12:36:53 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="18087" author="bobijam" created="Fri, 22 Jul 2011 03:04:09 +0000"  >&lt;p&gt;the unknown process (23245) is not part of the test processes, it was reading the dir (ll_readdir), including .lustre directory.&lt;/p&gt;</comment>
                            <comment id="18193" author="pjones" created="Mon, 25 Jul 2011 12:22:30 +0000"  >&lt;p&gt;Thanks Bobi. Let&apos;s close this ticket then&lt;/p&gt;</comment>
                            <comment id="18194" author="green" created="Mon, 25 Jul 2011 12:23:37 +0000"  >&lt;p&gt;I bet the unknown process is &quot;updatedb&quot; and the test failure is invalid as the result&lt;br/&gt;
we need to update /etc/updatedb.conf to not inlue lustre mountpoints there&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvfe7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6139</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>