<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:14:48 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8119] conf-sanity test_47: timeout</title>
                <link>https://jira.whamcloud.com/browse/LU-8119</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for liuying &amp;lt;emoly.liu@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/ff7c62c8-13ad-11e6-9e5d-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/ff7c62c8-13ad-11e6-9e5d-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_47 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test failed to respond and timed out
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Please provide additional information about the failure here.&lt;/p&gt;

&lt;p&gt;MDS console showed:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;06:28:40:Call Trace:
06:28:40: [&amp;lt;ffffffffa0c92a8c&amp;gt;] osd_trans_commit_cb+0x17c/0x3c0 [osd_zfs]
06:28:40: [&amp;lt;ffffffffa021686d&amp;gt;] dmu_tx_do_callbacks+0x3d/0x60 [zfs]
06:28:40: [&amp;lt;ffffffffa0258b58&amp;gt;] txg_do_callbacks+0x18/0x40 [zfs]
06:28:40: [&amp;lt;ffffffffa019d727&amp;gt;] taskq_thread+0x1e7/0x3f0 [spl]
06:28:40: [&amp;lt;ffffffff81067670&amp;gt;] ? default_wake_function+0x0/0x20
06:28:40: [&amp;lt;ffffffffa019d540&amp;gt;] ? taskq_thread+0x0/0x3f0 [spl]
06:28:40: [&amp;lt;ffffffff810a138e&amp;gt;] kthread+0x9e/0xc0
06:28:40: [&amp;lt;ffffffff8100c28a&amp;gt;] child_rip+0xa/0x20
06:28:40: [&amp;lt;ffffffff810a12f0&amp;gt;] ? kthread+0x0/0xc0
06:28:40: [&amp;lt;ffffffff8100c280&amp;gt;] ? child_rip+0x0/0x20
06:28:40:Code: 48 8b 0f 48 8b 41 48 48 8d 51 48 48 39 d0 48 8d 58 d8 74 2e 0f 1f 80 00 00 00 00 48 8b 43 10 48 85 c0 74 0d 48 8b 73 18 4c 89 e7 &amp;lt;ff&amp;gt; d0 49 8b 0c 24 48 8b 43 28 48 8d 51 48 48 39 d0 48 8d 58 d8 
06:28:40:RIP  [&amp;lt;ffffffffa136d550&amp;gt;] dt_txn_hook_commit+0x40/0x60 [obdclass]
06:28:40: RSP &amp;lt;ffff88005a2f3d80&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Info required for matching: conf-sanity 47&lt;/p&gt;</description>
                <environment></environment>
        <key id="36810">LU-8119</key>
            <summary>conf-sanity test_47: timeout</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 10 May 2016 03:14:57 +0000</created>
                <updated>Wed, 19 Apr 2017 12:24:37 +0000</updated>
                            <resolved>Wed, 19 Apr 2017 12:24:37 +0000</resolved>
                                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="152231" author="jamesanunez" created="Fri, 13 May 2016 15:51:24 +0000"  >&lt;p&gt;There&apos;s a similar stack trace for a conf-sanity test 44 timeout at &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/ecc6376e-1892-11e6-855a-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/ecc6376e-1892-11e6-855a-5254006e85c2&lt;/a&gt;. From the MDS console log:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;11:49:02:[10785.693635] Lustre: DEBUG MARKER: umount -d -f /mnt/mds1
11:49:02:[10786.476837] general protection fault: 0000 [#1] SMP 
11:49:02:[10786.477004] Modules linked in: osd_zfs(OE) lustre(OE) ofd(OE) osp(OE) lod(OE) ost(OE) mdt(OE) mdd(OE) mgs(OE) lquota(OE) lfsck(OE) obdecho(OE) mgc(OE) lov(OE) osc(OE) mdc(OE) lmv(OE) fid(OE) fld(OE) ptlrpc_gss(OE) ptlrpc(OE) obdclass(OE) ksocklnd(OE) lnet(OE) libcfs(OE) sha512_generic crypto_null dm_mod rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache xprtrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic crct10dif_common ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ppdev pcspkr virtio_balloon i2c_piix4 parport_pc parport zfs(POE) zunicode(POE) zavl(POE) zcommon(POE) znvpair(POE) nfsd nfs_acl lockd grace spl(OE) auth_rpcgss zlib_deflate sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi cirrus ata_piix syscopyarea 8139too virtio_blk sysfillrect sysimgblt drm_kms_helper ttm drm floppy libata virtio_pci i2c_core serio_raw virtio_ring 8139cp virtio mii [last unloaded: libcfs]
11:49:02:[10786.477004] CPU: 0 PID: 6194 Comm: tx_commit_cb Tainted: P           OE  ------------   3.10.0-327.13.1.el7_lustre.x86_64 #1
11:49:02:[10786.477004] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
11:49:02:[10786.477004] task: ffff88007a155080 ti: ffff88004d714000 task.ti: ffff88004d714000
11:49:02:[10786.477004] RIP: 0010:[&amp;lt;ffffffffa0ae0910&amp;gt;]  [&amp;lt;ffffffffa0ae0910&amp;gt;] dt_txn_hook_commit+0x30/0x60 [obdclass]
11:49:02:[10786.477004] RSP: 0018:ffff88004d717d88  EFLAGS: 00010207
11:49:02:[10786.477004] RAX: 5a5a5a5a5a5a5a5a RBX: 5a5a5a5a5a5a5a32 RCX: ffff88004e49c000
11:49:02:[10786.477004] RDX: ffff88004e49c048 RSI: ffffffffa13485c0 RDI: ffff880053a15810
11:49:02:[10786.477004] RBP: ffff88004d717d98 R08: 20737365636f7250 R09: 0a64657265746e65
11:49:02:[10786.477004] R10: ffff880054140b1f R11: 0a64657265746e65 R12: ffff880053a15800
11:49:02:[10786.477004] R13: 0000000000000000 R14: ffff880053a15850 R15: ffff88004e49c000
11:49:02:[10786.477004] FS:  0000000000000000(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
11:49:02:[10786.477004] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
11:49:02:[10786.477004] CR2: 00007f348ade1000 CR3: 000000007b9e0000 CR4: 00000000000006f0
11:49:02:[10786.477004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
11:49:02:[10786.477004] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
11:49:02:[10786.477004] Stack:
11:49:02:[10786.477004]  ffff880052c72ec0 ffff880053a15800 ffff88004d717dd0 ffffffffa14bf75d
11:49:02:[10786.477004]  ffff880052c72ec0 ffff880052c72e10 0000000000000000 ffff880052c72e00
11:49:02:[10786.477004]  ffff88004db7dd00 ffff88004d717e00 ffffffffa03d1884 ffff880052c72e00
11:49:02:[10786.477004] Call Trace:
11:49:02:[10786.477004]  [&amp;lt;ffffffffa14bf75d&amp;gt;] osd_trans_commit_cb+0xed/0x440 [osd_zfs]
11:49:02:[10786.477004]  [&amp;lt;ffffffffa03d1884&amp;gt;] dmu_tx_do_callbacks+0x44/0x70 [zfs]
11:49:02:[10786.477004]  [&amp;lt;ffffffffa04182c4&amp;gt;] txg_do_callbacks+0x14/0x30 [zfs]
11:49:02:[10786.477004]  [&amp;lt;ffffffffa02886de&amp;gt;] taskq_thread+0x21e/0x430 [spl]
11:49:02:[10786.477004]  [&amp;lt;ffffffff810b8c00&amp;gt;] ? wake_up_state+0x20/0x20
11:49:02:[10786.477004]  [&amp;lt;ffffffffa02884c0&amp;gt;] ? taskq_thread_spawn+0x60/0x60 [spl]
11:49:02:[10786.477004]  [&amp;lt;ffffffff810a5acf&amp;gt;] kthread+0xcf/0xe0
11:49:02:[10786.477004]  [&amp;lt;ffffffff810a5a00&amp;gt;] ? kthread_create_on_node+0x140/0x140
11:49:02:[10786.477004]  [&amp;lt;ffffffff81646018&amp;gt;] ret_from_fork+0x58/0x90
11:49:02:[10786.477004]  [&amp;lt;ffffffff810a5a00&amp;gt;] ? kthread_create_on_node+0x140/0x140
11:49:02:[10786.477004] Code: 55 48 89 e5 41 54 49 89 fc 53 f6 47 48 02 75 42 48 8b 0f 48 8b 41 48 48 8d 51 48 48 39 d0 48 8d 58 d8 74 2e 0f 1f 80 00 00 00 00 &amp;lt;48&amp;gt; 8b 43 10 48 85 c0 74 0d 48 8b 73 18 4c 89 e7 ff d0 49 8b 0c 
11:49:02:[10786.477004] RIP  [&amp;lt;ffffffffa0ae0910&amp;gt;] dt_txn_hook_commit+0x30/0x60 [obdclass]
11:49:02:[10786.477004]  RSP &amp;lt;ffff88004d717d88&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="176858" author="tappro" created="Wed, 7 Dec 2016 16:11:05 +0000"  >&lt;p&gt;I saw this issue regularly in my tests and have patch for it. The problem is that code in osd_trans_commit_cb():&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&#160;&lt;span class=&quot;code-comment&quot;&gt;/* call per-transaction callbacks &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; any */&lt;/span&gt;
 list_for_each_entry_safe(dcb, tmp, &amp;amp;oh-&amp;gt;ot_dcb_list, dcb_linkage)
         dcb-&amp;gt;dcb_func(NULL, th, dcb, error);

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;it misses list_del_init(); before dcb_func() call. the dcb entry can be destroyed by the dcb_func() and must be removed from the list prior that.&lt;/p&gt;

&lt;p&gt;Interesting that there is no such problem with ldiskfs OSD, the bug exists only in ZFS OSD code.&lt;/p&gt;

&lt;p&gt;I will push patch shortly.&lt;/p&gt;</comment>
                            <comment id="176867" author="gerrit" created="Wed, 7 Dec 2016 16:29:29 +0000"  >&lt;p&gt;Mike Pershin (mike.pershin@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/24202&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24202&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8119&quot; title=&quot;conf-sanity test_47: timeout&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8119&quot;&gt;&lt;del&gt;LU-8119&lt;/del&gt;&lt;/a&gt; osd: remove dcb entry from the list prior processing.&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 1b51c67568583be3d70506723c4ac8fa97176dcd&lt;/p&gt;</comment>
                            <comment id="176868" author="tappro" created="Wed, 7 Dec 2016 16:33:52 +0000"  >&lt;p&gt;I suppose that this bug is quite rare because there is only one commit callback in most cases but from JIRA I see it happens time to time and tickets are just closed when bug is not appearing for some time. Meanwhile it is critical bug anyway.&lt;/p&gt;</comment>
                            <comment id="179008" author="tappro" created="Sat, 24 Dec 2016 09:33:42 +0000"  >&lt;p&gt;I see several kinds of this issue still when applying high load on OSD and then umount it. Alex reported the same issues are happening in his tests.&lt;/p&gt;</comment>
                            <comment id="192613" author="gerrit" created="Wed, 19 Apr 2017 04:43:07 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/24202/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24202/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8119&quot; title=&quot;conf-sanity test_47: timeout&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8119&quot;&gt;&lt;del&gt;LU-8119&lt;/del&gt;&lt;/a&gt; osd: remove dcb entry from the list prior processing.&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: aa64476cef7d5f0050de20839925868013caa0b6&lt;/p&gt;</comment>
                            <comment id="192660" author="pjones" created="Wed, 19 Apr 2017 12:24:37 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyb0n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>