<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:49:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12097] general protection fault in osd_idc_find_and_init</title>
                <link>https://jira.whamcloud.com/browse/LU-12097</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;An lctl issued on the MDS triggered a crash.  Probably lctl changelog_deregister.  From the console log:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[1309661.423362] general protection fault: 0000 [#1] SMP
[1309661.429116] Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgc(OE) osd_zfs(OE) lquota(OE) fid(OE) fld(OE) ptlrpc(OE) obdclass(OE) ko2iblnd(OE) lnet(OE) libcfs(OE) ib_ucm iw_cxgb4 iw_cxgb3 rpcrdma rdma_ucm ib_umad ib_uverbs ib_ipoib ib_iser rdma_cm iw_cm ib_cm iTCO_wdt iTCO_vendor_support mlx4_ib ib_core mlx4_en sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass joydev pcspkr dm_round_robin i2c_i801 ioatdma lpc_ich ses enclosure mlx4_core sg devlink ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq sch_fq_codel zfs(POE) binfmt_misc zunicode(POE) zavl(POE) icp(POE) zcommon(POE) znvpair(POE) spl(OE) msr_safe(OE) ip_tables nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache overlay(T) ext4 mbcache jbd2 dm_service_time sd_mod crc_t10dif crct10dif_generic
[1309661.510303]  be2iscsi bnx2i cnic uio cxgb4i cxgb4 cxgb3i crct10dif_pclmul cxgb3 crct10dif_common crc32_pclmul crc32c_intel mgag200 ghash_clmulni_intel mdio 8021q drm_kms_helper syscopyarea garp libcxgbi sysfillrect aesni_intel sysimgblt mrp fb_sys_fops ttm lrw stp gf128mul llc libcxgb glue_helper ablk_helper drm qla4xxx cryptd isci ahci iscsi_boot_sysfs igb libsas libahci dca mpt2sas ptp libata drm_panel_orientation_quirks pps_core i2c_algo_bit raid_class scsi_transport_sas wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi
CPU: 4 PID: 113823 Comm: lctl Kdump: loaded Tainted: P           OE  ------------ T 3.10.0-957.5.1.3chaos.ch6.x86_64 #1
Hardware name: appro gb812x-mds-llnl/S2600JF, BIOS SE5C600.86B.02.06.0002.101320150901 10/13/2015
task: ffff95c37d6d1040 ti: ffff95d09fe30000 task.ti: ffff95d09fe30000
RIP: 0010:[&amp;lt;ffffffffc179fa6c&amp;gt;]  [&amp;lt;ffffffffc179fa6c&amp;gt;] osd_idc_find_and_init+0x3c/0x80 [osd_zfs]
RSP: 0018:ffff95d09fe336d8  EFLAGS: 00010206
RAX: ffff95d937a80820 RBX: ffff95d8ff0184e0 RCX: 0000ffffffffffff
RDX: 5a5a5a5a5a5a5a5a RSI: ffffffffc17b18e0 RDI: ffff95d09fe33d58
RBP: ffff95d09fe336f8 R08: 0000000000025c40 R09: ffff95d8ff01b810
R10: 0000000000003e00 R11: ffffcc545f8d4e00 R12: ffff95d09fe33d58
R13: ffff95db04b98000 R14: ffff95d8ff019c08 R15: ffff95d8ff018568
FS:  00002aaaaab111c0(0000) GS:ffff95cb3e100000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f6b9db3bdb8 CR3: 0000000deb844000 CR4: 00000000000607e0
Call Trace:
 [&amp;lt;ffffffffc1793f79&amp;gt;] osd_declare_destroy+0x309/0x410 [osd_zfs]
 [&amp;lt;ffffffffc1303571&amp;gt;] llog_osd_declare_destroy+0x331/0x650 [obdclass]
 [&amp;lt;ffffffffc12ecd64&amp;gt;] llog_declare_destroy+0x54/0x190 [obdclass]
 [&amp;lt;ffffffffc12ef9c8&amp;gt;] llog_cancel_rec+0x108/0x880 [obdclass]
 [&amp;lt;ffffffffc12f6840&amp;gt;] llog_cat_cancel_records+0x1d0/0x3d0 [obdclass]
 [&amp;lt;ffffffffc19d1784&amp;gt;] llog_changelog_cancel_cb+0xe4/0x1d0 [mdd]
 [&amp;lt;ffffffffc12f09bb&amp;gt;] llog_process_thread+0x87b/0x1470 [obdclass]
 [&amp;lt;ffffffffc1314707&amp;gt;] ? lprocfs_oh_tally+0x17/0x50 [obdclass]
 [&amp;lt;ffffffffc19d16a0&amp;gt;] ? mdd_hsm_actions_llog_fini+0x1a0/0x1a0 [mdd]
 [&amp;lt;ffffffffc12f166c&amp;gt;] llog_process_or_fork+0xbc/0x450 [obdclass]
 [&amp;lt;ffffffffc12f6e7d&amp;gt;] llog_cat_process_cb+0x43d/0x4e0 [obdclass]
 [&amp;lt;ffffffffc12f09bb&amp;gt;] llog_process_thread+0x87b/0x1470 [obdclass]
 [&amp;lt;ffffffffc12f6a40&amp;gt;] ? llog_cat_cancel_records+0x3d0/0x3d0 [obdclass]
 [&amp;lt;ffffffffc12f166c&amp;gt;] llog_process_or_fork+0xbc/0x450 [obdclass]
 [&amp;lt;ffffffffc12f6a40&amp;gt;] ? llog_cat_cancel_records+0x3d0/0x3d0 [obdclass]
 [&amp;lt;ffffffffc12f5f19&amp;gt;] llog_cat_process_or_fork+0x199/0x2a0 [obdclass]
 [&amp;lt;ffffffffc19d16a0&amp;gt;] ? mdd_hsm_actions_llog_fini+0x1a0/0x1a0 [mdd]
 [&amp;lt;ffffffffc12f604e&amp;gt;] llog_cat_process+0x2e/0x30 [obdclass]
 [&amp;lt;ffffffffc19d0a48&amp;gt;] llog_changelog_cancel+0x58/0x1d0 [mdd]
 [&amp;lt;ffffffffc19d24e0&amp;gt;] ? mdd_changelog_write_header+0x40/0x4b0 [mdd]
 [&amp;lt;ffffffffc12f7a07&amp;gt;] llog_cancel+0x57/0x250 [obdclass]
 [&amp;lt;ffffffffc19d2b59&amp;gt;] mdd_changelog_llog_cancel+0xd9/0x270 [mdd]
 [&amp;lt;ffffffffc19d5edf&amp;gt;] mdd_iocontrol+0x13af/0x16f0 [mdd]
 [&amp;lt;ffffffffc18968dc&amp;gt;] mdt_iocontrol+0x5ec/0xb00 [mdt]
 [&amp;lt;ffffffffc1305789&amp;gt;] class_handle_ioctl+0x19a9/0x1e50 [obdclass]
 [&amp;lt;ffffffffb15f6c8d&amp;gt;] ? handle_mm_fault+0x39d/0x9b0
 [&amp;lt;ffffffffb170d1fe&amp;gt;] ? security_capable+0x1e/0x20
 [&amp;lt;ffffffffc12ea5d2&amp;gt;] obd_class_ioctl+0xd2/0x170 [obdclass]
 [&amp;lt;ffffffffb1664b60&amp;gt;] do_vfs_ioctl+0x3d0/0x600
 [&amp;lt;ffffffffb1b936fb&amp;gt;] ? __do_page_fault+0x23b/0x550
 [&amp;lt;ffffffffb1664e31&amp;gt;] SyS_ioctl+0xa1/0xc0
 [&amp;lt;ffffffffb1b98f5b&amp;gt;] system_call_fastpath+0x22/0x27
 [&amp;lt;ffffffffb1b98ea1&amp;gt;] ? system_call_after_swapgs+0xae/0x146
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Crash occurred on MDT0001 (2 MDS system)&lt;br/&gt;
zfs-0.7.11-5llnl.ch6&lt;br/&gt;
lustre-2.10.6_2.chaos&lt;br/&gt;
See &lt;a href=&quot;https://github.com/llnl/lustre&quot;&gt;https://github.com/llnl/lustre&lt;/a&gt; and &lt;a href=&quot;https://github.com/llnl/zfs&quot;&gt;https://github.com/llnl/zfs&lt;/a&gt;&lt;br/&gt;
Linux version 3.10.0-957.5.1.3chaos.ch6.x86_64&lt;br/&gt;
RHEL 7.6 based distro</environment>
        <key id="55218">LU-12097</key>
            <summary>general protection fault in osd_idc_find_and_init</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="4">Incomplete</resolution>
                                        <assignee username="sebastien">Sebastien Buisson</assignee>
                                    <reporter username="ofaaland">Olaf Faaland</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Thu, 21 Mar 2019 22:51:02 +0000</created>
                <updated>Mon, 10 Jun 2019 21:59:32 +0000</updated>
                            <resolved>Sat, 18 May 2019 11:26:42 +0000</resolved>
                                    <version>Lustre 2.10.6</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="244466" author="ofaaland" created="Thu, 21 Mar 2019 22:52:01 +0000"  >&lt;p&gt;For our own tracking: local ticket is TOSS-4474&lt;/p&gt;</comment>
                            <comment id="244563" author="pjones" created="Fri, 22 Mar 2019 22:43:57 +0000"  >&lt;p&gt;S&#233;bastien&lt;/p&gt;

&lt;p&gt;Could you please advise here?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="244635" author="sebastien" created="Mon, 25 Mar 2019 16:54:07 +0000"  >&lt;p&gt;Would you by any chance be able to share a crash dump?&lt;/p&gt;

&lt;p&gt;My impression is, if this problem relates to other ticket &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12098&quot; title=&quot;changelog_deregister appears not to reliably clear all changelog entries&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12098&quot;&gt;&lt;del&gt;LU-12098&lt;/del&gt;&lt;/a&gt;, that the catalog file is incoherent, so when Changelog entries need to be freed, the llog files to which the catalog is pointing to are not valid.&lt;/p&gt;

&lt;p&gt;Could you try to read the catalog file with llog_reader?&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;llog_reader /zfs_mount/changelog_catalog
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It should output something like this:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Header size : 8192
Time : Mon Mar 25 15:36:53 2019
Number of records: 4
Target uuid :
-----------------------
#01 (064)id=[0x6:0x1:0x0]:0 path=oi.1/0x1:0x6:0x0
#02 (064)id=[0x7:0x1:0x0]:0 path=oi.1/0x1:0x7:0x0
#03 (064)id=[0x8:0x1:0x0]:0 path=oi.1/0x1:0x8:0x0
#04 (064)id=[0x9:0x1:0x0]:0 path=oi.1/0x1:0x9:0x0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Then it would be interesting to see if all the llog files referenced as path in the catalog file do exist, and if they do contain valid changelog records that can be read with llog_reader.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;for i in 6 7 8 9; do llog_reader /zfs_mount/oi.1/0x1:0x${i}:0x0; done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Thanks.&lt;/p&gt;</comment>
                            <comment id="244642" author="ofaaland" created="Mon, 25 Mar 2019 20:31:27 +0000"  >&lt;p&gt;Hi Sebastien,&lt;br/&gt;
That sounds reasonable.  Unfortunately I cannot share a crash dump from this system.  But I will find out whether the changelog_catalog records point to existing objects and whether those objects contain valid changelog records.&lt;br/&gt;
Thanks!&lt;/p&gt;</comment>
                            <comment id="244852" author="ofaaland" created="Thu, 28 Mar 2019 22:01:00 +0000"  >&lt;p&gt;Hi Sebastien,&lt;br/&gt;
I checked and confirmed that all the changelog_catalog records point to existing llogs.  I confirmed that for each llog file, the records are all of type &apos;changelog&apos;, according to the output of llog_reader.  So if they are incoherent, it&apos;s not in an obvious way.  &lt;/p&gt;

&lt;p&gt;The MDT that crashed with this stack has 2911 entries in changelog_catalog.  The llog with the fewest records has 11590; the llog with the most records has 64101.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;----------------
eporter82
----------------
Changelog_catalog header:
Header size : 8192
Number of records: 2911

Missing changelog files:  0
Existing changelog files:  2911
checking oi.1/0x1:0x2e14f:0x0
checking oi.1/0x1:0x2e150:0x0
&amp;lt;... 2909 rows removed ...&amp;gt;
all files checked

MAX 64101 MIN 11590 TOT 185533101 AVG 63735
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="245534" author="ofaaland" created="Wed, 10 Apr 2019 17:19:46 +0000"  >&lt;p&gt;Hi Sebastien,&lt;br/&gt;
This system was one of the two on which I performed the register/de-register to clear orphaned changelog entries.  There were no errors or crashes.&lt;/p&gt;</comment>
                            <comment id="245560" author="sebastien" created="Thu, 11 Apr 2019 06:19:02 +0000"  >&lt;p&gt;That is good to hear, thanks.&lt;/p&gt;</comment>
                            <comment id="246646" author="sebastien" created="Fri, 3 May 2019 15:08:48 +0000"  >&lt;p&gt;So, I think that the issue is now fixed on site, and the actual fix in Lustre has been pushed to Gerrit under &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12098&quot; title=&quot;changelog_deregister appears not to reliably clear all changelog entries&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12098&quot;&gt;&lt;del&gt;LU-12098&lt;/del&gt;&lt;/a&gt; via patch &lt;a href=&quot;https://review.whamcloud.com/34688&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34688&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;Please correct me if I am wrong.&lt;/p&gt;</comment>
                            <comment id="246670" author="ofaaland" created="Fri, 3 May 2019 19:01:31 +0000"  >&lt;blockquote&gt;&lt;p&gt;So, I think that the issue is now fixed on site, and the actual fix in Lustre has been pushed to Gerrit under&#160;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12098&quot; title=&quot;changelog_deregister appears not to reliably clear all changelog entries&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12098&quot;&gt;&lt;del&gt;LU-12098&lt;/del&gt;&lt;/a&gt;&#160;via patch&#160;&lt;a href=&quot;https://review.whamcloud.com/34688&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34688&lt;/a&gt;.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I do not see in the ticket that we ever determined why osd_idc_find_and_init() attempted to dereference an invalid pointer.&lt;/p&gt;

&lt;p&gt;There was speculation that the changelog_catalog was incoherent, but I was able to read the catalog with llog_reader, confirm that it pointed to existing llog files, and read those llog files with llog_reader to confirm they contained changelog type records.&#160; So I think that rules out the incoherency theory.&lt;/p&gt;

&lt;p&gt;But maybe I missed something - if so, let me know.&#160; Thanks!&lt;/p&gt;</comment>
                            <comment id="246746" author="sebastien" created="Mon, 6 May 2019 16:34:44 +0000"  >&lt;p&gt;Correct, I have not been able to find the origin of the invalid pointer dereference by osd_idc_find_and_init(). I was not able to reproduce locally, and unfortunately you are not able to share your crash dump.&lt;/p&gt;

&lt;p&gt;As I read this ticket again, I realize that this is in fact the &apos;lctl changelog_deregister&apos; command that initially triggered the crash, and then left &quot;orphaned&quot; changelog entries. So I agree it is most likely that the crash is not due to an incoherent changelog. I need to look again at the code to see if I can see something odd.&lt;br/&gt;
By any chance, would you be able to exploit your crash dump so that you can find out the exact line of code from which the GPF was generated?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="246762" author="ofaaland" created="Mon, 6 May 2019 21:55:35 +0000"  >&lt;p&gt;Hi Sebastien,&lt;/p&gt;

&lt;p&gt;I get the following from inspecting the kmods with gdb.&#160; From zfs-osd:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) l *(osd_idc_find_and_init+0x3c)
0x14a9c is in osd_idc_find_and_init (/usr/src/debug/lustre-2.10.6_2.chaos/lustre/osd-zfs/osd_oi.c:896).
891             idc = osd_idc_add(env, osd, fid);
892             if (IS_ERR(idc))
893                     return PTR_ERR(idc);
894
895             if (obj-&amp;gt;oo_dn)
896                     idc-&amp;gt;oic_dnode = obj-&amp;gt;oo_dn-&amp;gt;dn_object;
897
898             return 0;
899     }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;and&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) l *(osd_declare_destroy+0x309)
0x8fa9 is in osd_declare_destroy (/usr/src/debug/lustre-2.10.6_2.chaos/lustre/osd-zfs/osd_object.c:603).
598
599             /* will help to find FID-&amp;gt;ino when this object is being
600              * added to PENDING/ */
601             osd_idc_find_and_init(env, osd, obj);
602
603             RETURN(0);
604     }
605
606     static int osd_destroy(const struct lu_env *env, struct dt_object *dt,
607                            struct thandle *th)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;and below that in obdclass:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) l *(llog_osd_declare_destroy+0x331)
0x195a1 is in llog_osd_declare_destroy (/usr/src/debug/lustre-2.10.6_2.chaos/lustre/obdclass/llog_osd.c:1722).
1717            rc = dt_declare_ref_del(env, o, th);
1718            if (rc &amp;lt; 0)
1719                    GOTO(out_put, rc);
1720
1721            rc = dt_declare_destroy(env, o, th);
1722            if (rc &amp;lt; 0)
1723                    GOTO(out_put, rc);
1724
1725            if (loghandle-&amp;gt;lgh_ctxt-&amp;gt;loc_flags &amp;amp; LLOG_CTXT_FLAG_NORMAL_FID) {
1726                    rc = llog_osd_regular_fid_del_name_entry(env, o, th, true);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;and&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) l *(llog_declare_destroy+0x54)
0x2d94 is in llog_declare_destroy (/usr/src/debug/lustre-2.10.6_2.chaos/libcfs/include/libcfs/libcfs_debug.h:219).
214     /**
215      * Filters out logging messages based on mask and subsystem.
216      */
217     static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
218     {
219             return mask &amp;amp; D_CANTMASK ||
220                     ((libcfs_debug &amp;amp; mask) &amp;amp;&amp;amp; (libcfs_subsystem_debug &amp;amp; subsystem));
221     }
222
223     #  define __CDEBUG(cdls, mask, format, ...)                             \
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;and&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) l *(llog_cancel_rec+0x108)
0x59f8 is in llog_cancel_rec (/usr/src/debug/lustre-2.10.6_2.chaos/lustre/obdclass/llog.c:231).
226             if (rc &amp;lt; 0)
227                     GOTO(out_trans, rc);
228
229             if ((llh-&amp;gt;llh_flags &amp;amp; LLOG_F_ZAP_WHEN_EMPTY)) {
230                     rc = llog_declare_destroy(env, loghandle, th);
231                     if (rc &amp;lt; 0)
232                             GOTO(out_trans, rc);
233             }
234
235             th-&amp;gt;th_wait_submit = 1;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="246974" author="sebastien" created="Fri, 10 May 2019 14:58:41 +0000"  >&lt;p&gt;Thanks Olaf, this is really helpful.&lt;/p&gt;

&lt;p&gt;It seems the problem stems from the data structure pointed to by obj in osd_idc_find_and_init, that is being corrupted.&lt;br/&gt;
Strangely, before crashing in a call from osd_declare_destroy, osd_idc_find_and_init is called with the same third parameter by osd_declare_ref_del, and the 2 calls are very consecutive in llog_osd_declare_destroy.&lt;br/&gt;
So the data structure has been corrupted in between. It could be the sign that this object has been destroyed in the meantime.&lt;br/&gt;
One theory I have is a changelog deregister carried out while a changelog clear is already in progress, or 2 concurrent changelog clear or changelog deregister.&lt;/p&gt;

&lt;p&gt;In order to find out if the object is already freed, would you be able to dump the content of the obj variable as a struct osd_object? I am interested in the value of the oo_destroyed field in particular.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
Sebastien.&lt;/p&gt;</comment>
                            <comment id="247020" author="ofaaland" created="Fri, 10 May 2019 23:24:20 +0000"  >&lt;p&gt;Hi Sebastien,&lt;/p&gt;

&lt;p&gt;I&apos;m unable to get that struct dump for you, sorry.&#160; The core dump was not written completely because Pacemaker powered the node off while kdump was writing it out.&lt;/p&gt;

&lt;p&gt;Sounds like we may have gone as far with this as we can - if so feel free to close the ticket.&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="247373" author="pjones" created="Sat, 18 May 2019 11:26:42 +0000"  >&lt;p&gt;Seems like we&apos;re stuck on this one for the time being&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00dp3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>