<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:54:55 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12704] racer test_1: Invalid layout: The component end must be aligned by the stripe size</title>
                <link>https://jira.whamcloud.com/browse/LU-12704</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for jianyu &amp;lt;yujian@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/4c310714-c728-11e9-9fc9-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/4c310714-c728-11e9-9fc9-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_1 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;layout: raid0 raid0 pfl pfl pfl dom dom dom flr flr flr
Invalid layout: The component end must be aligned by the stripe size
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&amp;lt;&amp;lt;Please provide additional information about the failure here&amp;gt;&amp;gt;&lt;/p&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
racer test_1 - Timeout occurred after 833 mins, last suite running was racer, restarting cluster to continue tests&lt;/p&gt;</description>
                <environment></environment>
        <key id="56768">LU-12704</key>
            <summary>racer test_1: Invalid layout: The component end must be aligned by the stripe size</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="vsaveliev">Vladimir Saveliev</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 27 Aug 2019 05:07:28 +0000</created>
                <updated>Wed, 23 Dec 2020 10:44:48 +0000</updated>
                            <resolved>Tue, 12 Nov 2019 06:39:45 +0000</resolved>
                                    <version>Lustre 2.13.0</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="254642" author="adilger" created="Thu, 12 Sep 2019 22:57:43 +0000"  >&lt;p&gt;Andreas Dilger (adilger@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/36174&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36174&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12704&quot; title=&quot;racer test_1: Invalid layout: The component end must be aligned by the stripe size&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12704&quot;&gt;&lt;del&gt;LU-12704&lt;/del&gt;&lt;/a&gt; tests: component end must be multiple of stripesize&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 75b596e686209cd3dc7f4add49a7c57596c6f1f1&lt;/p&gt;</comment>
                            <comment id="254644" author="adilger" created="Thu, 12 Sep 2019 23:09:09 +0000"  >&lt;p&gt;Note that on 2.10/2.12 (between commits &lt;tt&gt;v2_9_55_0-14-g89693927f0..v2_12_55-83-gff5eb304fa&lt;/tt&gt;) it isn&apos;t allowed to create files with &lt;tt&gt;stripe_size&lt;/tt&gt; that isn&apos;t a power-of-two value due to:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
                       &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (stripe_size == 0 ||
                           (prev_end != LUSTRE_EOF &amp;amp;&amp;amp;
                            (prev_end &amp;amp; (stripe_size - 1)))) {
                               CDEBUG(D_LAYOUT, &lt;span class=&quot;code-quote&quot;&gt;&quot;stripe size isn&apos;t aligned. &quot;&lt;/span&gt;
                                      &lt;span class=&quot;code-quote&quot;&gt;&quot; stripe_sz: %u, [%llu, %llu)\n&quot;&lt;/span&gt;,
                                      stripe_size, ext-&amp;gt;e_start, prev_end);
                               RETURN(-EINVAL);
                       }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;This was fixed as part of SEL to check only that they are a multiple, not assuming a power-of-two value:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (prev_end % stripe_size) {
                        CDEBUG(D_LAYOUT, &lt;span class=&quot;code-quote&quot;&gt;&quot;stripe size isn&apos;t aligned, &quot;&lt;/span&gt;
                               &lt;span class=&quot;code-quote&quot;&gt;&quot;stripe_sz: %u, [%llu, %llu)\n&quot;&lt;/span&gt;,
                               stripe_size, ext-&amp;gt;e_start, prev_end);
                        RETURN(-EINVAL);
                }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;so if this patch is backported to b2_12 we should include that part of the change, otherwise setstripe will still be failing with random 64KiB-multiple stripe_size.&lt;/p&gt;</comment>
                            <comment id="255394" author="jamesanunez" created="Wed, 25 Sep 2019 18:57:51 +0000"  >&lt;p&gt;We&apos;re seeing racer test 1 crash with the &apos;Invalid layout&apos; messages in the client test_log and the following call trace:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[50194.820231] 2[26110]: segfault at 0 ip           (null) sp 00007ffd1d570c28 error 14 in 2[400000+6000]
[50260.478242] 13[17503]: segfault at 8 ip 00007f5cedcb5718 sp 00007fff9f4f5d70 error 4 in ld-2.17.so[7f5cedcaa000+22000]
[50261.303734] 13[17172]: segfault at 8 ip 00007fc73cbf4718 sp 00007ffffb9ec790 error 4 in ld-2.17.so[7fc73cbe9000+22000]
[50280.191268] 16[9651]: segfault at 8 ip 00007f7cbdbdd718 sp 00007ffe4ab86420 error 4 in ld-2.17.so[7f7cbdbd2000+22000]
[50283.020027] 19[13428]: segfault at 8 ip 00007f9ce34c0718 sp 00007ffc51e417c0 error 4 in ld-2.17.so[7f9ce34b5000+22000]
[50297.598388] general protection fault: 0000 [#1] SMP 
[50297.599253] Modules linked in: lustre(OE) obdecho(OE) mgc(OE) lov(OE) mdc(OE) osc(OE) lmv(OE) fid(OE) fld(OE) ptlrpc_gss(OE) ptlrpc(OE) obdclass(OE) ksocklnd(OE) lnet(OE) libcfs(OE) nfsd nfs_acl brd loop rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core sunrpc iosf_mbi crc32_pclmul ghash_clmulni_intel ppdev aesni_intel lrw gf128mul glue_helper ablk_helper cryptd joydev pcspkr virtio_balloon i2c_piix4 parport_pc parport ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_blk ata_piix libata 8139too crct10dif_pclmul crct10dif_common crc32c_intel
[50297.607808]  serio_raw 8139cp virtio_pci virtio_ring mii virtio floppy [last unloaded: libcfs]
[50297.608709] CPU: 1 PID: 16082 Comm: ldlm_bl_03 Kdump: loaded Tainted: G           OE  ------------   3.10.0-957.27.2.el7.x86_64 #1
[50297.609823] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[50297.610384] task: ffff95157aa35140 ti: ffff95155a928000 task.ti: ffff95155a928000
[50297.611111] RIP: 0010:[&amp;lt;ffffffffc082c368&amp;gt;]  [&amp;lt;ffffffffc082c368&amp;gt;] cl_object_flush+0x48/0x120 [obdclass]
[50297.612168] RSP: 0018:ffff95155a92bb20  EFLAGS: 00010292
[50297.612680] RAX: 5a5a5a5a5a5a5a5a RBX: 5a5a5a5a5a5a5a42 RCX: 0000000000000000
[50297.613366] RDX: ffff95154c051bd8 RSI: 0000000000000246 RDI: 0000000000000246
[50297.614050] RBP: ffff95155a92bb40 R08: 0000000000000000 R09: 0000000180180017
[50297.614735] R10: 0000000000000001 R11: ffffe84bc1bdc2c0 R12: ffff95156f70b430
[50297.615430] R13: ffff951549f8ac68 R14: ffff95155f7ae6c0 R15: 0000000000000000
[50297.616120] FS:  0000000000000000(0000) GS:ffff95157fd00000(0000) knlGS:0000000000000000
[50297.616899] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[50297.617451] CR2: 00007fde9a099f34 CR3: 0000000078754000 CR4: 00000000000606e0
[50297.618146] Call Trace:
[50297.618436]  [&amp;lt;ffffffffc0cde4ec&amp;gt;] lov_flush_composite+0x17c/0x1d0 [lov]
[50297.619091]  [&amp;lt;ffffffffc0cdd2e2&amp;gt;] lov_object_flush+0x22/0x60 [lov]
[50297.619713]  [&amp;lt;ffffffffc082c383&amp;gt;] cl_object_flush+0x63/0x120 [obdclass]
[50297.620440]  [&amp;lt;ffffffffc0d5affe&amp;gt;] ll_lock_cancel_bits+0x3ce/0x9f0 [lustre]
[50297.621121]  [&amp;lt;ffffffffc0d5bd68&amp;gt;] ll_md_blocking_ast+0x248/0x2b0 [lustre]
[50297.621895]  [&amp;lt;ffffffffc0a704aa&amp;gt;] ldlm_cancel_callback+0x8a/0x330 [ptlrpc]
[50297.622789]  [&amp;lt;ffffffffc067e425&amp;gt;] ? cfs_trace_unlock_tcd+0x35/0x90 [libcfs]
[50297.623828]  [&amp;lt;ffffffffc0a7b3c0&amp;gt;] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc]
[50297.625013]  [&amp;lt;ffffffffc0a81121&amp;gt;] ldlm_cli_cancel+0x161/0x650 [ptlrpc]
[50297.626050]  [&amp;lt;ffffffffc0d5bc0a&amp;gt;] ll_md_blocking_ast+0xea/0x2b0 [lustre]
[50297.627204]  [&amp;lt;ffffffffc0a8593d&amp;gt;] ldlm_handle_bl_callback+0xed/0x4e0 [ptlrpc]
[50297.627945]  [&amp;lt;ffffffffc0a86530&amp;gt;] ldlm_bl_thread_main+0x800/0xa40 [ptlrpc]
[50297.628653]  [&amp;lt;ffffffff854d7c40&amp;gt;] ? wake_up_state+0x20/0x20
[50297.629234]  [&amp;lt;ffffffffc0a85d30&amp;gt;] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc]
[50297.629962]  [&amp;lt;ffffffff854c2e81&amp;gt;] kthread+0xd1/0xe0
[50297.630458]  [&amp;lt;ffffffff854c2db0&amp;gt;] ? insert_kthread_work+0x40/0x40
[50297.631082]  [&amp;lt;ffffffff85b76c37&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[50297.631728]  [&amp;lt;ffffffff854c2db0&amp;gt;] ? insert_kthread_work+0x40/0x40
[50297.632319] Code: 4c 8b 26 74 0d f6 05 d3 7c e7 ff 20 0f 85 91 00 00 00 49 8b 44 24 40 49 83 c4 40 49 39 c4 48 8d 58 e8 74 33 0f 1f 80 00 00 00 00 &amp;lt;48&amp;gt; 8b 43 28 48 8b 40 68 48 85 c0 74 12 4c 89 f2 48 89 de 4c 89 
[50297.635385] RIP  [&amp;lt;ffffffffc082c368&amp;gt;] cl_object_flush+0x48/0x120 [obdclass]
[50297.636104]  RSP &amp;lt;ffff95155a92bb20&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Logs are at&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/a38f656e-dc59-11e9-b62b-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/a38f656e-dc59-11e9-b62b-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/f9fb09b6-dc46-11e9-a197-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/f9fb09b6-dc46-11e9-a197-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="255440" author="vsaveliev" created="Thu, 26 Sep 2019 16:23:19 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[50297.612680] RAX: 5a5a5a5a5a5a5a5a RBX: 5a5a5a5a5a5a5a42 RCX: 0000000000000000
[50297.613366] RDX: ffff95154c051bd8 RSI: 0000000000000246 RDI: 0000000000000246
[50297.614050] RBP: ffff95155a92bb40 R08: 0000000000000000 R09: 0000000180180017
[50297.614735] R10: 0000000000000001 R11: ffffe84bc1bdc2c0 R12: ffff95156f70b430
[50297.615430] R13: ffff951549f8ac68 R14: ffff95155f7ae6c0 R15: 0000000000000000
[50297.616120] FS:  0000000000000000(0000) GS:ffff95157fd00000(0000) knlGS:0000000000000000
[50297.616899] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[50297.617451] CR2: 00007fde9a099f34 CR3: 0000000078754000 CR4: 00000000000606e0
[50297.618146] Call Trace:
[50297.618436]  [&amp;lt;ffffffffc0cde4ec&amp;gt;] lov_flush_composite+0x17c/0x1d0 [lov]
[50297.619091]  [&amp;lt;ffffffffc0cdd2e2&amp;gt;] lov_object_flush+0x22/0x60 [lov]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;This is introduced by 707bab62f5 (&quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12296&quot; title=&quot;ll_dom_lock_cancel() should zero kms attribute&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12296&quot;&gt;&lt;del&gt;LU-12296&lt;/del&gt;&lt;/a&gt; llite: improve ll_dom_lock_cancel&quot;).&lt;/p&gt;

&lt;p&gt;cl_object_flush needs i/o initialized. Otherwise, it races with layout change.&lt;/p&gt;</comment>
                            <comment id="255441" author="gerrit" created="Thu, 26 Sep 2019 16:26:23 +0000"  >&lt;p&gt;Vladimir Saveliev (c17830@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/36300&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36300&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12704&quot; title=&quot;racer test_1: Invalid layout: The component end must be aligned by the stripe size&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12704&quot;&gt;&lt;del&gt;LU-12704&lt;/del&gt;&lt;/a&gt; llite: init i/o for cl_object_flush&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: aa6e3dbaf3249ca26537b21565882aec42b2aa57&lt;/p&gt;</comment>
                            <comment id="255870" author="gerrit" created="Fri, 4 Oct 2019 03:44:38 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/36174/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36174/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12704&quot; title=&quot;racer test_1: Invalid layout: The component end must be aligned by the stripe size&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12704&quot;&gt;&lt;del&gt;LU-12704&lt;/del&gt;&lt;/a&gt; tests: component end must be multiple of stripesize&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 1cb7bdb883b0b19d944a4bee8403d0b5898a3998&lt;/p&gt;</comment>
                            <comment id="255885" author="gerrit" created="Fri, 4 Oct 2019 11:39:35 +0000"  >&lt;p&gt;Mike Pershin (mpershin@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/36368&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36368&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12704&quot; title=&quot;racer test_1: Invalid layout: The component end must be aligned by the stripe size&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12704&quot;&gt;&lt;del&gt;LU-12704&lt;/del&gt;&lt;/a&gt; lov: take lsm reference in lov_flush_composite&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9c5d50763ac4ce36f48a05e968ad3c84ffcdbe96&lt;/p&gt;</comment>
                            <comment id="255886" author="tappro" created="Fri, 4 Oct 2019 11:43:22 +0000"  >&lt;p&gt;I was fixing DOM entry checking in lov_flush_composite() and thought that taking LSM reference should protect us from layout change. Could you check if patch above fixes that problem?&lt;/p&gt;</comment>
                            <comment id="255887" author="vsaveliev" created="Fri, 4 Oct 2019 12:02:57 +0000"  >&lt;blockquote&gt;&lt;p&gt;Could you check if patch above fixes that problem?&lt;/p&gt;&lt;/blockquote&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 2091.709375] BUG: unable to handle kernel NULL pointer dereference at           (null)
[ 2091.712428] IP: [&amp;lt;          (null)&amp;gt;]           (null)
...
[ 2091.736582] CPU: 0 PID: 20963 Comm: ldlm_bl_08 Kdump: loaded Tainted: G           OE  ------------   3.10.0-957.5.1.el7.x86_64 #1
...
[ 2091.773122] Call Trace:
[ 2091.775002]  [&amp;lt;ffffffffc0c9a1a2&amp;gt;] ? lov_object_flush+0x22/0x60 [lov]
[ 2091.777577]  [&amp;lt;ffffffffc093e193&amp;gt;] cl_object_flush+0x63/0x120 [obdclass]
[ 2091.780136]  [&amp;lt;ffffffffc0e1c408&amp;gt;] ll_lock_cancel_bits+0x9b8/0xc00 [lustre]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;According to crash dump (from Cray&apos;s test system), the BUG happened when lov_object_flush() tried to dispatch to lov_flush_composite(), so probably the change in&#160;lov_flush_composite() will not help.&lt;br/&gt;
 However, lov_object has LLT_EMPTY layout already:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; lov_object.lo_type,lo_lsm 0xffff8f46e1af42e0
  lo_type = LLT_EMPTY
  lo_lsm = 0x0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;That probably happened earlier when layout lock was canceled:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00010000:0.0:1569576242.662719:0:21009:0:(namei.c:248:ll_lock_cancel_bits()) ### to cancel bits 0x19 ns: lustre-MDT0001-mdc-ffff8f4777b0e000 lock: ffff8f4765861d40/0x7909e24de166dec lrc: 3/0,0 mode: PR/PR res: [0x240000406:0x5278:0x0].0x0 bits 0x19/0x19 rrc: 3 type: IBT flags: 0x429400000000 nid: local remote: 0x96dffd219df32662 expref: -99 pid: 3158 timeout: 0 lvb_type: 0
00000080:00200000:0.0:1569576242.662727:0:21009:0:(vvp_object.c:140:vvp_conf_set()) [0x240000406:0x5278:0x0]: losing layout lock
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Then on canceling DoM lock ldlm_bl_08 thread faced with empty layout:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000080:00010000:0.0:1569576242.728877:0:20963:0:(namei.c:248:ll_lock_cancel_bits()) ### to cancel bits 0x40 ns: lustre-MDT0001-mdc-ffff8f4777b0e000 lock: ffff8f47663ee000/0x7909e24de166f19 lrc: 2/0,0 mode: PR/PR res: [0x240000406:0x5278:0x0].0x0 bits 0x48/0x40 rrc: 3 type: IBT flags: 0x460400000000 nid: local remote: 0x96dffd219df32c12 expref: -99 pid: 3288 timeout: 0 lvb_type: 3
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="255890" author="vsaveliev" created="Fri, 4 Oct 2019 12:18:02 +0000"  >&lt;p&gt;It seems that all&#160;elements of lov_dispatch[] need all members of struct lov_layout_operations initialized.&lt;/p&gt;

&lt;p&gt;So, how about defining lov_dispatch&lt;span class=&quot;error&quot;&gt;&amp;#91;LLT_EMPTY&amp;#93;&lt;/span&gt;.llo_flush and lov_dispatch&lt;span class=&quot;error&quot;&gt;&amp;#91;LLT_RELEASED&amp;#93;&lt;/span&gt;.llo_flush to a function like&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; lov_flush_empty()
{
   &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; 0;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Mike, would that work? &lt;/p&gt;</comment>
                            <comment id="255905" author="tappro" created="Fri, 4 Oct 2019 14:17:15 +0000"  >&lt;p&gt;Vladimir, yes, I think it is what we need to add&lt;/p&gt;

&lt;p&gt;P.S. I&apos;ve updated patch with that code&lt;/p&gt;</comment>
                            <comment id="255911" author="vsaveliev" created="Fri, 4 Oct 2019 15:00:08 +0000"  >&lt;p&gt;Mike, wouldn&apos;t it be better to have&#160; &lt;a href=&quot;https://review.whamcloud.com/36368&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36368&lt;/a&gt;&#160;on top of &lt;a href=&quot;https://review.whamcloud.com/#/c/36300&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/36300?&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="255931" author="tappro" created="Fri, 4 Oct 2019 19:21:17 +0000"  >&lt;p&gt;Vladimir, do you think &lt;a href=&quot;https://review.whamcloud.com/#/c/36300&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/36300&lt;/a&gt; is still needed? I was thinking that we are safe from layout change while LSM is referenced and no need to initialize IO in llite.&lt;/p&gt;</comment>
                            <comment id="256610" author="adilger" created="Fri, 18 Oct 2019 01:25:48 +0000"  >&lt;p&gt;Mike, Oleg is still hitting crashes with &lt;a href=&quot;https://review.whamcloud.com/36300&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36300&lt;/a&gt; so it can&apos;t land as-is.  Does it make sense to rebase your patch to be directly on master so that it can land independently, or does it depend on 36300 in order to work properly?&lt;/p&gt;</comment>
                            <comment id="256611" author="pjones" created="Fri, 18 Oct 2019 01:28:59 +0000"  >&lt;p&gt;Moving to 2.14 until we can understand the crashes in Olegtest&lt;/p&gt;</comment>
                            <comment id="256637" author="vsaveliev" created="Fri, 18 Oct 2019 10:16:04 +0000"  >&lt;blockquote&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
+&#187;&#160;&#160;&#160;&#160;&#160;&#160;&#160;io-&amp;gt;ci_ignore_layout = 1;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;(defect) I think that ignore_layout shouldn&apos;t be used here as its ignoring layout locking in LOV, as mentioned in lov_io_init() it is used along with CIT_MISC from OSC usually, because OSC object pins layout already. In our case we want cl_object_flush() be protected from layout change so shouldn&apos;t set ci_ignore_layout&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Mike, yes, you are right, io-&amp;gt;ci_ignore_layout set to 1 leads to race between layout change and cl_io_init. Something like&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00020000:00040000:0.0:1571007482.482526:0:28488:0:(lov_io.c:318:lov_io_mirror_init()) ASSERTION( comp-&amp;gt;lo_preferred_mirror == 0 ) failed:
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;has been seen few times.&lt;/p&gt;

&lt;p&gt;However, there is also a problem with io-&amp;gt;ci_ignore_layout set to 0.&lt;/p&gt;

&lt;p&gt;Namely, the below lockup has beed observed:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[&amp;lt;ffffffffc096f065&amp;gt;] ldlm_completion_ast+0x4e5/0x860 [ptlrpc]
[&amp;lt;ffffffffc0970e2a&amp;gt;] ldlm_cli_enqueue_fini+0x63a/0xef0 [ptlrpc]
[&amp;lt;ffffffffc0973b71&amp;gt;] ldlm_cli_enqueue+0x451/0xa60 [ptlrpc]
[&amp;lt;ffffffffc0ba7730&amp;gt;] mdc_enqueue_base+0x330/0x1c40 [mdc]
[&amp;lt;ffffffffc0ba9a85&amp;gt;] mdc_intent_lock+0x135/0x560 [mdc]
[&amp;lt;ffffffffc0be6742&amp;gt;] lmv_intent_lock+0x402/0xa20 [lmv]
[&amp;lt;ffffffffc0c0eb1d&amp;gt;] ll_layout_intent+0x1dd/0x720 [lustre]
[&amp;lt;ffffffffc0c1fa6c&amp;gt;] ll_layout_refresh+0x30c/0x900 [lustre]
[&amp;lt;ffffffffc0c62ea7&amp;gt;] vvp_io_init+0x347/0x460 [lustre]
[&amp;lt;ffffffffc076bf4b&amp;gt;] cl_io_init0.isra.15+0x8b/0x160 [obdclass]
[&amp;lt;ffffffffc076c0e3&amp;gt;] cl_io_init+0x43/0x80 [obdclass]
[&amp;lt;ffffffffc0c41fe5&amp;gt;] ll_lock_cancel_bits+0x625/0xca0 [lustre]
[&amp;lt;ffffffffc0c42a5c&amp;gt;] ll_md_blocking_ast+0x24c/0x2b0 [lustre]
[&amp;lt;ffffffffc09626ba&amp;gt;] ldlm_cancel_callback+0x8a/0x330 [ptlrpc]
[&amp;lt;ffffffffc096e311&amp;gt;] ldlm_cli_cancel_local+0xd1/0x420 [ptlrpc]
[&amp;lt;ffffffffc097294a&amp;gt;] ldlm_cli_cancel_list_local+0xea/0x280 [ptlrpc]
[&amp;lt;ffffffffc0972c6b&amp;gt;] ldlm_cancel_resource_local+0x18b/0x2a0 [ptlrpc]
[&amp;lt;ffffffffc0ba02ac&amp;gt;] mdc_resource_get_unused_res+0x10c/0x250 [mdc]
[&amp;lt;ffffffffc0bb1057&amp;gt;] mdc_enqueue_send+0x557/0x710 [mdc]
[&amp;lt;ffffffffc0bb14b2&amp;gt;] mdc_lock_enqueue+0x2a2/0x6f2 [mdc]
[&amp;lt;ffffffffc07696d5&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[&amp;lt;ffffffffc0ab51e5&amp;gt;] lov_lock_enqueue+0x95/0x150 [lov]
[&amp;lt;ffffffffc07696d5&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[&amp;lt;ffffffffc0769c67&amp;gt;] cl_lock_request+0x67/0x1f0 [obdclass]
[&amp;lt;ffffffffc076d9cb&amp;gt;] cl_io_lock+0x2bb/0x3d0 [obdclass]
[&amp;lt;ffffffffc076dcfa&amp;gt;] cl_io_loop+0xba/0x1c0 [obdclass]
[&amp;lt;ffffffffc0c5981f&amp;gt;] cl_setattr_ost+0x25f/0x3d0 [lustre]
[&amp;lt;ffffffffc0c34b28&amp;gt;] ll_setattr_raw+0xcc8/0x1060 [lustre]
[&amp;lt;ffffffffc0c34f23&amp;gt;] ll_setattr+0x63/0xc0 [lustre]
[&amp;lt;ffffffffbc260524&amp;gt;] notify_change+0x2c4/0x420
[&amp;lt;ffffffffbc23f335&amp;gt;] do_truncate+0x75/0xc0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;mdc_enqueue_send() tries to do early cancel,  ll_lock_cancel_bits()&amp;#45;&amp;gt;ll_dom_lock_cancel() initializes cl_io-&amp;gt;ci_ignore_layout to 0, so vvp_io_init() does ll_layout_refresh() and takes lli&amp;#45;&amp;gt;lli_layout_mutex and sends enqueue rpc. Server sends blocking ast back to the client, so another ll_lock_cancel_bits() gets to run and stuck on trying to lock the mutex lli-&amp;gt;lli_layout_mutex:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; ll_layout_refresh(struct inode *inode, __u32 *gen)
{
...
    mutex_lock(&amp;amp;lli-&amp;gt;lli_layout_mutex);
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[&amp;lt;ffffffffc0c1f94e&amp;gt;] ll_layout_refresh+0x1ee/0x900 [lustre]
[&amp;lt;ffffffffc0c62ea7&amp;gt;] vvp_io_init+0x347/0x460 [lustre]
[&amp;lt;ffffffffc076bf4b&amp;gt;] cl_io_init0.isra.15+0x8b/0x160 [obdclass]
[&amp;lt;ffffffffc076c0e3&amp;gt;] cl_io_init+0x43/0x80 [obdclass]
[&amp;lt;ffffffffc0c41fe5&amp;gt;] ll_lock_cancel_bits+0x625/0xca0 [lustre]
[&amp;lt;ffffffffc0c42a99&amp;gt;] ll_md_blocking_ast+0x289/0x2b0 [lustre]
[&amp;lt;ffffffffc0978bdd&amp;gt;] ldlm_handle_bl_callback+0xed/0x4e0 [ptlrpc]
[&amp;lt;ffffffffc09797d0&amp;gt;] ldlm_bl_thread_main+0x800/0xa40 [ptlrpc]
[&amp;lt;ffffffffbc0c1c71&amp;gt;] kthread+0xd1/0xe0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
</comment>
                            <comment id="256842" author="vsaveliev" created="Tue, 22 Oct 2019 15:19:12 +0000"  >&lt;blockquote&gt;&lt;p&gt;(defect) I think that ignore_layout shouldn&apos;t be used here as its ignoring layout locking in LOV, as mentioned in lov_io_init() it is used along with CIT_MISC from OSC usually, because OSC object pins layout already. In our case we want cl_object_flush() be protected from layout change so shouldn&apos;t set ci_ignore_layout&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Mike, it looks like we are in trouble with this patch as io-&amp;gt;ci_ignore_layout = 0; as well as io-&amp;gt;ci_ignore_layout = 1; leads to a problem. Any idea?&lt;/p&gt;</comment>
                            <comment id="258124" author="gerrit" created="Tue, 12 Nov 2019 04:07:15 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/36368/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36368/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12704&quot; title=&quot;racer test_1: Invalid layout: The component end must be aligned by the stripe size&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12704&quot;&gt;&lt;del&gt;LU-12704&lt;/del&gt;&lt;/a&gt; lov: check all entries in lov_flush_composite&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 44460570fd21a91002190c8a0620923125135b52&lt;/p&gt;</comment>
                            <comment id="258140" author="pjones" created="Tue, 12 Nov 2019 06:39:45 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="60503">LU-13928</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00lvr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>