<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:28:29 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2821] osd_handler.c:2482:osd_declare_object_ref_del()) ASSERTION( dt_object_exists(dt) &amp;&amp; !dt_object_remote(dt) ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-2821</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Had this assertion failure in replay-single test 26:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[447277.695681] Lustre: DEBUG MARKER: == replay-single test 27: |X| open(O_CREAT), unlink two, replay, close two (test mds_cleanup_orphans) == 04:43:29 (1360921409)
[447278.101651] Turning device loop0 (0x700000) read-only
[447278.145558] Lustre: DEBUG MARKER: mds1 REPLAY BARRIER on lustre-MDT0000
[447278.152329] Lustre: DEBUG MARKER: local REPLAY BARRIER on lustre-MDT0000
[447278.795747] Removing read-only on unknown block (0x700000)
[447290.601469] LDISKFS-fs (loop0): recovery complete
[447290.615599] LDISKFS-fs (loop0): mounted filesystem with ordered data mode. quota=on. Opts: 
[447290.655601] LustreError: 5393:0:(ldlm_resource.c:805:ldlm_resource_complain()) Namespace MGC192.168.10.211@tcp resource refcount nonzero (1) after lock cleanup; forcing cleanup.
[447290.656331] LustreError: 5393:0:(ldlm_resource.c:805:ldlm_resource_complain()) Skipped 1 previous similar message
[447290.656915] LustreError: 5393:0:(ldlm_resource.c:811:ldlm_resource_complain()) Resource: ffff88008def0e78 (111542254400876/0/0/0) (rc: 1)
[447290.657404] LustreError: 5393:0:(ldlm_resource.c:811:ldlm_resource_complain()) Skipped 1 previous similar message
[447290.657864] LustreError: 5393:0:(ldlm_resource.c:1404:ldlm_resource_dump()) --- Resource: ffff88008def0e78 (111542254400876/0/0/0) (rc: 2)
[447292.228086] Lustre: lustre-MDT0000-mdc-ffff88005bca2bf0: Connection restored to lustre-MDT0000 (at 0@lo)
[447292.228559] Lustre: Skipped 23 previous similar messages
[447292.241230] LustreError: 5456:0:(osd_handler.c:2482:osd_declare_object_ref_del()) ASSERTION( dt_object_exists(dt) &amp;amp;&amp;amp; !dt_object_remote(dt) ) failed: 
[447292.241794] LustreError: 5456:0:(osd_handler.c:2482:osd_declare_object_ref_del()) LBUG
[447292.242247] Pid: 5456, comm: tgt_recov
[447292.242486] 
[447292.242486] Call Trace:
[447292.242862]  [&amp;lt;ffffffffa0cbd915&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[447292.243133]  [&amp;lt;ffffffffa0cbdf17&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[447292.243393]  [&amp;lt;ffffffffa081ee38&amp;gt;] osd_declare_object_ref_del+0x98/0x130 [osd_ldiskfs]
[447292.243831]  [&amp;lt;ffffffffa098632b&amp;gt;] lod_declare_ref_del+0x3b/0xd0 [lod]
[447292.244094]  [&amp;lt;ffffffffa075fd32&amp;gt;] orph_declare_index_delete+0x112/0x2e0 [mdd]
[447292.244523]  [&amp;lt;ffffffffa0760579&amp;gt;] __mdd_orphan_cleanup+0x679/0xca0 [mdd]
[447292.244828]  [&amp;lt;ffffffffa076f4ad&amp;gt;] mdd_recovery_complete+0xed/0x170 [mdd]
[447292.245135]  [&amp;lt;ffffffffa0894bb5&amp;gt;] mdt_postrecov+0x35/0xd0 [mdt]
[447292.245394]  [&amp;lt;ffffffffa0896638&amp;gt;] mdt_obd_postrecov+0x78/0x90 [mdt]
[447292.245703]  [&amp;lt;ffffffffa1257f90&amp;gt;] ? ldlm_reprocess_res+0x0/0x20 [ptlrpc]
[447292.246005]  [&amp;lt;ffffffffa1268b01&amp;gt;] target_recovery_thread+0xb31/0x1610 [ptlrpc]
[447292.246449]  [&amp;lt;ffffffffa1267fd0&amp;gt;] ? target_recovery_thread+0x0/0x1610 [ptlrpc]
[447292.246866]  [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
[447292.247113]  [&amp;lt;ffffffffa1267fd0&amp;gt;] ? target_recovery_thread+0x0/0x1610 [ptlrpc]
[447292.247552]  [&amp;lt;ffffffffa1267fd0&amp;gt;] ? target_recovery_thread+0x0/0x1610 [ptlrpc]
[447292.247971]  [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
[447292.248208] 
[447292.304220] Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Crashdump and modules in /exports/crashdumps/192.168.10.211-2013-02-15-04\:43\:51&lt;/p&gt;</description>
                <environment></environment>
        <key id="17588">LU-2821</key>
            <summary>osd_handler.c:2482:osd_declare_object_ref_del()) ASSERTION( dt_object_exists(dt) &amp;&amp; !dt_object_remote(dt) ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Fri, 15 Feb 2013 12:19:19 +0000</created>
                <updated>Thu, 27 Feb 2020 23:56:10 +0000</updated>
                            <resolved>Thu, 27 Feb 2020 23:56:10 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="52545" author="di.wang" created="Sat, 16 Feb 2013 20:02:32 +0000"  >&lt;p&gt;According to the debug log, clearly there is a race of orphan cleanup between mdd_close and mdd_recovery_complete&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000004:00080000:3.0:1360921424.522237:0:5430:0:(mdd_object.c:1966:mdd_close()) Object [0x280001b71:0x13:0x0] is deleted from orphan list, OSS objects to be destroyed.
00000040:00100000:3.0:1360921424.522262:0:5430:0:(llog_osd.c:465:llog_osd_write_rec()) added record 0x53: idx: 2, 64
00000100:00100000:3.0:1360921424.522362:0:5430:0:(service.c:2020:ptlrpc_server_handle_request()) Handled RPC pname:cluuid+ref:pid:xid:nid:opc mdt_rdpg00_001:8af42fe9-8c1e-88fd-452d-5295dd97c2d5+7:5216:x1427028696914976:12345-0@lo:35 Request procesed in 2989us (3023us total) trans 158913789954 rc 0/0
00000100:00100000:3.0:1360921424.522366:0:5430:0:(nrs_fifo.c:245:nrs_fifo_req_stop()) NRS stop fifo request from 12345-0@lo, seq: 0
00000100:00100000:3.0:1360921424.522402:0:5216:0:(client.c:1803:ptlrpc_check_set()) Completed RPC pname:cluuid:pid:xid:nid:opc multiop:8af42fe9-8c1e-88fd-452d-5295dd97c2d5:5216:1427028696914976:0@lo:35
00000080:00200000:0.0:1360921424.522740:0:5219:0:(llite_lib.c:1416:ll_setattr_raw()) lustre: setattr inode ffff88008e1b3b08/fid:[0x280001b71:0x14:0x0] from 0 to 18446612134838689624, valid 41
00000004:00080000:2.0:1360921424.522756:0:5456:0:(mdd_orphans.c:394:orph_key_test_and_del()) Found orphan [0x280001b71:0x13:0x0], delete it
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="52928" author="tappro" created="Sat, 23 Feb 2013 14:24:47 +0000"  >&lt;p&gt;Looks like another assertion that may be not correct because of locking is not around whole transaction but execution now.&lt;/p&gt;</comment>
                            <comment id="68474" author="green" created="Mon, 7 Oct 2013 06:00:47 +0000"  >&lt;p&gt;I just hit this once more.&lt;br/&gt;
Where, how about having some patches for the locking here?&lt;/p&gt;</comment>
                            <comment id="74595" author="schamp" created="Wed, 8 Jan 2014 20:14:44 +0000"  >&lt;p&gt;I have hit what appears to match this bug while running acceptance on my modified 2.4.1 with a rebuild of e2fsprogs-1.42.7.wc2.  The MDS trips while running large-scale 3a.&lt;/p&gt;

&lt;p&gt;Build Version: sgi241a18.rhel6---CHANGED-2.6.32-358.23.2.el6sgi241a18.rhel6.x86_64.lustre&lt;br/&gt;
LNet: Added LNI 192.168.21.13@o2ib1 &lt;span class=&quot;error&quot;&gt;&amp;#91;8/256/0/180&amp;#93;&lt;/span&gt;&lt;br/&gt;
LDISKFS-fs (sdc): recovery complete&lt;br/&gt;
LDISKFS-fs (sdc): mounted filesystem with ordered data mode. quota=on. Opts:&lt;br/&gt;
Lustre: accfs1-MDT0000: used disk, loading&lt;br/&gt;
LustreError: 11-0: accfs1-MDT0000-lwp-MDT0000: Communicating with 0@lo, operation mds_connect failed with -11.&lt;br/&gt;
Lustre: accfs1-MDT0000: Will be in recovery for at least 1:00, or until 2 clients reconnect&lt;br/&gt;
LustreError: 11488:0:(mdt_open.c:1497:mdt_reint_open()) @@@ OPEN &amp;amp; CREAT not in open replay/by_fid.  req@ffff880322817000 x1456290811566604/t0(8589934684) o101-&amp;gt;83ac397d-ea05-8390-b7e2-d950b803713b@192.168.21.6@o2ib1:0/0 lens 568/1136 e 0 to 0 dl 1388882068 ref 1 fl Interpret:/4/0 rc 0/0&lt;br/&gt;
LustreError: 11488:0:(mdt_open.c:1497:mdt_reint_open()) @@@ OPEN &amp;amp; CREAT not in open replay/by_fid.  req@ffff880320d09800 x1456290811566608/t0(8589934685) o101-&amp;gt;83ac397d-ea05-8390-b7e2-d950b803713b@192.168.21.6@o2ib1:0/0 lens 568/1136 e 0 to 0 dl 1388882068 ref 1 fl Interpret:/4/0 rc 0/0&lt;br/&gt;
Lustre: accfs1-MDT0000: Recovery over after 0:02, of 2 clients 2 recovered and 0 were evicted.&lt;br/&gt;
LustreError: 11540:0:(osd_handler.c:2609:osd_declare_object_ref_del()) ASSERTION( dt_object_exists(dt) &amp;amp;&amp;amp; !dt_object_remote(dt) ) failed:&lt;br/&gt;
LustreError: 11540:0:(osd_handler.c:2609:osd_declare_object_ref_del()) LBUG&lt;br/&gt;
Pid: 11540, comm: tgt_recov&lt;/p&gt;

&lt;p&gt;Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07a9895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07a9e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1065289&amp;gt;&amp;#93;&lt;/span&gt; osd_declare_object_ref_del+0x99/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_ldiskfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa121f06b&amp;gt;&amp;#93;&lt;/span&gt; lod_declare_ref_del+0x3b/0xd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lod&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f096e2&amp;gt;&amp;#93;&lt;/span&gt; orph_declare_index_delete+0x112/0x2e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f09f29&amp;gt;&amp;#93;&lt;/span&gt; __mdd_orphan_cleanup+0x679/0xca0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0f1937d&amp;gt;&amp;#93;&lt;/span&gt; mdd_recovery_complete+0xed/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdd&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa114acb5&amp;gt;&amp;#93;&lt;/span&gt; mdt_postrecov+0x35/0xd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa114c178&amp;gt;&amp;#93;&lt;/span&gt; mdt_obd_postrecov+0x78/0x90 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a49530&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_reprocess_res+0x0/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a448ee&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_reprocess_all_ns+0x3e/0x110 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a59c54&amp;gt;&amp;#93;&lt;/span&gt; target_recovery_thread+0xc74/0x1970 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a58fe0&amp;gt;&amp;#93;&lt;/span&gt; ? target_recovery_thread+0x0/0x1970 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c0ca&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;/p&gt;

&lt;p&gt;This failure only occurs following a failure in the lfsck tests.  If lfsck tests do not run,&lt;br/&gt;
(and fail) this test passes.  performance-sanity may run successfully between the tests with no impact on the crash.  The lfsck tests conclude with:&lt;/p&gt;

&lt;p&gt;I/O read: 8MB, write: 0MB, rate: 21.07MB/s&lt;br/&gt;
 lfsck : @@@@@@ FAIL: e2fsck -d -v -t -t -f -n --mdsdb /data1/cluster-tools/out/1041154/20140107.125659/mdsdb /dev/sdc returned 4, should be &amp;lt;= 1&lt;/p&gt;


&lt;p&gt;I have a pile of the resulting cores and logs, and can make them available.&lt;/p&gt;</comment>
                            <comment id="264200" author="adilger" created="Thu, 27 Feb 2020 23:56:10 +0000"  >&lt;p&gt;Close old bug that hasn&apos;t been seen in a long time.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="26238">LU-5565</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvj7j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6826</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>