<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:39:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4132] Test failure on test suite sanity, subtest test_46</title>
                <link>https://jira.whamcloud.com/browse/LU-4132</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for wangdi &amp;lt;di.wang@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/557b9ede-3a38-11e3-aede-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/557b9ede-3a38-11e3-aede-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_46 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity 46&lt;/p&gt;
</description>
                <environment></environment>
        <key id="21570">LU-4132</key>
            <summary>Test failure on test suite sanity, subtest test_46</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 22 Oct 2013 06:29:01 +0000</created>
                <updated>Mon, 17 Apr 2017 21:25:25 +0000</updated>
                            <resolved>Mon, 17 Apr 2017 21:25:25 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="69497" author="di.wang" created="Tue, 22 Oct 2013 06:30:17 +0000"  >&lt;p&gt;Though this failure only happens in DNE test, but I suspect this is related with 16 OSTs.  Usually we use 8 OSTs in single MDT run, but 16 OSTs(4 MDTs) in DNE run.&lt;/p&gt;</comment>
                            <comment id="69544" author="green" created="Tue, 22 Oct 2013 17:36:46 +0000"  >&lt;p&gt;the client is stuck waiting on lock enqueue as part of a sync (trying to refresh a layout lock?):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;flush-lustre- S 0000000000000000     0 10803      2 0x00000080
 ffff88005328b4a0 0000000000000046 0000000000000000 0a64657265746e65
 0000000000000000 0000000000000000 ffff88005571e9c0 ffff88005571e9f0
 ffff88007c13fab8 ffff88005328bfd8 000000000000fb88 ffff88007c13fab8
Call Trace:
 [&amp;lt;ffffffffa03e77b1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffff8150ef22&amp;gt;] schedule_timeout+0x192/0x2e0
 [&amp;lt;ffffffff810811e0&amp;gt;] ? process_timeout+0x0/0x10
 [&amp;lt;ffffffffa06c295a&amp;gt;] ptlrpc_set_wait+0x2da/0x860 [ptlrpc]
 [&amp;lt;ffffffff81063410&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa06cc056&amp;gt;] ? lustre_msg_set_jobid+0xb6/0x140 [ptlrpc]
 [&amp;lt;ffffffffa06c2f67&amp;gt;] ptlrpc_queue_wait+0x87/0x220 [ptlrpc]
 [&amp;lt;ffffffffa06a5855&amp;gt;] ldlm_cli_enqueue+0x365/0x790 [ptlrpc]
 [&amp;lt;ffffffffa06aa520&amp;gt;] ? ldlm_completion_ast+0x0/0x920 [ptlrpc]
 [&amp;lt;ffffffffa0a81740&amp;gt;] ? ll_md_blocking_ast+0x0/0x790 [lustre]
 [&amp;lt;ffffffffa08a9b0e&amp;gt;] mdc_enqueue+0x2be/0x1a10 [mdc]
 [&amp;lt;ffffffffa03e77b1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa03e77b1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa085db34&amp;gt;] lmv_enqueue+0x2f4/0xfc0 [lmv]
 [&amp;lt;ffffffffa0a5c155&amp;gt;] ll_layout_refresh+0x515/0xfe0 [lustre]
 [&amp;lt;ffffffffa0a81740&amp;gt;] ? ll_md_blocking_ast+0x0/0x790 [lustre]
 [&amp;lt;ffffffffa06aa520&amp;gt;] ? ldlm_completion_ast+0x0/0x920 [ptlrpc]
 [&amp;lt;ffffffffa05958b2&amp;gt;] ? cl_io_slice_add+0x132/0x190 [obdclass]
 [&amp;lt;ffffffffa0aaa7c0&amp;gt;] vvp_io_init+0x340/0x490 [lustre]
 [&amp;lt;ffffffffa03e77b1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa0594ae8&amp;gt;] cl_io_init0+0x98/0x160 [obdclass]
 [&amp;lt;ffffffffa0597874&amp;gt;] cl_io_init+0x64/0xe0 [obdclass]
 [&amp;lt;ffffffffa0a50f4d&amp;gt;] cl_sync_file_range+0x12d/0x500 [lustre]
 [&amp;lt;ffffffffa0a7861b&amp;gt;] ll_writepages+0x8b/0x1c0 [lustre]
 [&amp;lt;ffffffff8112e181&amp;gt;] do_writepages+0x21/0x40
 [&amp;lt;ffffffff811aca0d&amp;gt;] writeback_single_inode+0xdd/0x290
 [&amp;lt;ffffffff811ace1e&amp;gt;] writeback_sb_inodes+0xce/0x180
 [&amp;lt;ffffffff811acf7b&amp;gt;] writeback_inodes_wb+0xab/0x1b0
 [&amp;lt;ffffffff811ad31b&amp;gt;] wb_writeback+0x29b/0x3f0
 [&amp;lt;ffffffff8150e130&amp;gt;] ? thread_return+0x4e/0x76e
 [&amp;lt;ffffffff81081be2&amp;gt;] ? del_timer_sync+0x22/0x30
 [&amp;lt;ffffffff811ad52b&amp;gt;] wb_do_writeback+0xbb/0x240
 [&amp;lt;ffffffff811ad713&amp;gt;] bdi_writeback_task+0x63/0x1b0
 [&amp;lt;ffffffff81096c67&amp;gt;] ? bit_waitqueue+0x17/0xd0
 [&amp;lt;ffffffff8113cc20&amp;gt;] ? bdi_start_fn+0x0/0x100
 [&amp;lt;ffffffff8113cca6&amp;gt;] bdi_start_fn+0x86/0x100
 [&amp;lt;ffffffff8113cc20&amp;gt;] ? bdi_start_fn+0x0/0x100
 [&amp;lt;ffffffff81096a36&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff810969a0&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="69553" author="adilger" created="Tue, 22 Oct 2013 17:50:54 +0000"  >&lt;p&gt;It looks like there IS some kind of problem specific to DNE and LFSCK. In the MDT4 console log it is repeatedly reporting problems with a FID on that MDT but it seems that LFSCK is not fixing it. &lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;10:43:43:LustreError: 0-0: lustre-MDT0003: trigger OI scrub by RPC for [0x6c0000400:0xd36:0x0], rc = 0 [1]
10:43:43:LustreError: Skipped 21 previous similar messages
10:43:43:LustreError: 0-0: lustre-MDT0003: trigger OI scrub by RPC for [0x6c0000400:0xd36:0x0], rc = 0 [1]
10:43:43:LustreError: Skipped 42 previous similar messages
10:43:43:LustreError: 0-0: lustre-MDT0003: trigger OI scrub by RPC for [0x6c0000400:0xd36:0x0], rc = 0 [1]
10:43:43:LustreError: Skipped 85 previous similar messages
10:43:43:LustreError: 0-0: lustre-MDT0003: trigger OI scrub by RPC for [0x6c0000400:0xd36:0x0], rc = 0 [1]
10:43:43:LustreError: Skipped 99 previous similar messages
10:43:43:LustreError: 0-0: lustre-MDT0003: trigger OI scrub by RPC for [0x6c0000400:0xd36:0x0], rc = 0 [1]
10:43:43:LustreError: Skipped 97 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This is happening hundreds if times per second, which is the first problem. LFSCK shouldn&apos;t be restarting so quickly. &lt;/p&gt;

&lt;p&gt;Secondly, DNE shouldn&apos;t be creating filesystems during normal operation that LFSCK isn&apos;t happy about.  This would be a bug in DNE or LFSCK. &lt;/p&gt;

&lt;p&gt;Finally, whatever problem is being found by LFSCK is not being fixed by LFSCK, since it is being reported in the same FID repeatedly. &lt;/p&gt;</comment>
                            <comment id="69583" author="di.wang" created="Tue, 22 Oct 2013 21:30:37 +0000"  >&lt;p&gt;Hmm, It seems client are trying to get layout lock from MDT4 during sync(there are a few sync in sanity 46), but MDT4 is busy with OI scrub somehow. And I checked debug log of MDT4.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;80000000:00000001:0.0:1381970194.705176:0:10443:0:(fld_cache.c:559:fld_cache_lookup()) Process leaving (rc=0 : 0 : 0)
80000000:00000001:0.0:1381970194.705177:0:10443:0:(fld_handler.c:138:fld_server_lookup()) Process leaving (rc=0 : 0 : 0)
00000004:00000040:0.0:1381970194.705179:0:10443:0:(lod_dev.c:88:lod_fld_lookup()) LOD: got tgt 3 for sequence: 0x6c0000400
00000004:00000001:0.0:1381970194.705181:0:10443:0:(lod_dev.c:90:lod_fld_lookup()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1381970194.705183:0:10443:0:(lod_dev.c:143:lod_object_alloc()) Process leaving (rc=18446612134391954456 : -131939317597160 : ffff88007bd7bc18)
00000004:00000001:0.0:1381970194.705185:0:10443:0:(mdd_object.c:181:mdd_object_init()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1381970194.705188:0:10443:0:(lod_object.c:1203:lod_object_init()) Process entered
00000004:00000010:0.0:1381970194.705193:0:10443:0:(osd_handler.c:172:osd_object_alloc()) kmalloced &apos;mo&apos;: 176 at ffff88006752c9c0.
00000004:00000001:0.0:1381970194.705195:0:10443:0:(lod_object.c:1215:lod_object_init()) Process leaving (rc=0 : 0 : 0)
00000004:00000001:0.0:1381970194.705198:0:10443:0:(osd_handler.c:394:osd_fid_lookup()) Process entered
00000004:00000001:0.0:1381970194.705204:0:10443:0:(osd_handler.c:327:osd_check_lma()) Process entered
00000004:00000002:0.0:1381970194.705221:0:10443:0:(osd_handler.c:371:osd_check_lma()) lustre-MDT0003: FID [0x6c0000400:0xd56:0x0] != self_fid [0x6c0000400:0xd57:0x0]
00000004:00000001:0.0:1381970194.705224:0:10443:0:(osd_handler.c:375:osd_check_lma()) Process leaving (rc=18446744073709551538 : -78 : ffffffffffffffb2)
00000004:00000001:0.0:1381970194.705228:0:10443:0:(osd_scrub.c:1991:osd_scrub_start()) Process entered
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;It seems OI cache problem(Note: this object is normal local object see result from lod_fld_lookup), this might also explain why LFSCK can not fix it? (Since the on-disk LMA might be right). Fan Yong, could you please comment? Thanks. &lt;/p&gt;</comment>
                            <comment id="69600" author="yong.fan" created="Wed, 23 Oct 2013 00:17:43 +0000"  >&lt;p&gt;It is probably caused by the OI cache. I have made a patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4106&quot; title=&quot;racer test hang&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4106&quot;&gt;&lt;del&gt;LU-4106&lt;/del&gt;&lt;/a&gt; which contains the fixing for OI cache cleanup:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/8002/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/8002/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="192354" author="adilger" created="Mon, 17 Apr 2017 21:25:25 +0000"  >&lt;p&gt;Close old issue as duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4106&quot; title=&quot;racer test hang&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4106&quot;&gt;&lt;del&gt;LU-4106&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="21417">LU-4106</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw6k7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>11189</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>