<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:25:13 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16235] cdt_agent_record_hash_add() ASSERTION( carl0-&gt;carl_cat_idx == carl1-&gt;carl_cat_idx ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-16235</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When adding new HSM actions llog records in mdt_agent_record_add(), cdt_state might be in CDT_INIT, so the HSM actions llog may not have been fully processed to set cdt_last_cookie to an appropriately large value, leading to cookie values being reused and triggering the assertions in cdt_agent_record_hash_add(). &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13689&quot; title=&quot;Replace cdt_state_lock with cdt_llog_lock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13689&quot;&gt;LU-13689&lt;/a&gt; attempted to fix this, but there might be a simpler solution.&lt;/p&gt;</description>
                <environment></environment>
        <key id="72775">LU-16235</key>
            <summary>cdt_agent_record_hash_add() ASSERTION( carl0-&gt;carl_cat_idx == carl1-&gt;carl_cat_idx ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="3" iconUrl="https://jira.whamcloud.com/images/icons/statuses/inprogress.png" description="This issue is being actively worked on at the moment by the assignee.">In Progress</status>
                    <statusCategory id="4" key="indeterminate" colorName="inprogress"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="nangelinas">Nikitas Angelinas</assignee>
                                    <reporter username="nangelinas">Nikitas Angelinas</reporter>
                        <labels>
                    </labels>
                <created>Wed, 12 Oct 2022 19:21:48 +0000</created>
                <updated>Fri, 12 Jan 2024 12:00:36 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="349427" author="gerrit" created="Wed, 12 Oct 2022 19:22:39 +0000"  >&lt;p&gt;&quot;Nikitas Angelinas &amp;lt;nikitas.angelinas@hpe.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/48842&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/48842&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16235&quot; title=&quot;cdt_agent_record_hash_add() ASSERTION( carl0-&amp;gt;carl_cat_idx == carl1-&amp;gt;carl_cat_idx ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16235&quot;&gt;LU-16235&lt;/a&gt; hsm: check cdt_state before adding actions llog record&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 10fd63dfe8cd712abc848a72719b44c8759f85e5&lt;/p&gt;</comment>
                            <comment id="381051" author="gerrit" created="Wed, 2 Aug 2023 10:06:53 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51850&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51850&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16235&quot; title=&quot;cdt_agent_record_hash_add() ASSERTION( carl0-&amp;gt;carl_cat_idx == carl1-&amp;gt;carl_cat_idx ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16235&quot;&gt;LU-16235&lt;/a&gt; hsm: get a valid cookie for RAoLU request&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 923f1081659826a10d3a10c43ed60453e934954c&lt;/p&gt;</comment>
                            <comment id="384318" author="gerrit" created="Thu, 31 Aug 2023 06:26:34 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/48842/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/48842/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16235&quot; title=&quot;cdt_agent_record_hash_add() ASSERTION( carl0-&amp;gt;carl_cat_idx == carl1-&amp;gt;carl_cat_idx ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16235&quot;&gt;LU-16235&lt;/a&gt; hsm: check CDT state before adding actions llog&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: fe5706e0c19f96e4f821790004f05ab265002e9d&lt;/p&gt;</comment>
                            <comment id="384503" author="bzzz" created="Fri, 1 Sep 2023 05:39:15 +0000"  >&lt;p&gt;this patch causes GFP:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[ 9128.588763] Lustre: DEBUG MARKER: == conf-sanity test 132: hsm_actions processed after failover ========================================================== 05:32:30 (1693546350)
...
[ 9196.856259] Lustre: Found index 0 &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; lustre-MDT0000, updating log
[ 9197.308569] systemd[1]: mnt-lustre\x2dmds1.mount: Succeeded.
[ 9197.541121] Lustre: server umount lustre-MDT0000 complete
[ 9197.920157] BUG: unable to handle kernel paging request at ffff89866f0e2698
[ 9197.920511] PGD 76e01067 P4D 76e01067 PUD 176f48067 PMD 176dcf067 PTE 800ffffed0f1d060
[ 9197.920558] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
[ 9197.920586] CPU: 1 PID: 481978 Comm: hsm_cdtr Tainted: G        W  O     --------- -  - 4.18.0 #2
[ 9197.920636] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 9197.920690] RIP: 0010:mdt_coordinator+0xd7/0x1a10 [mdt]
[ 9197.920728] Code: ff 01 db 74 31 31 db eb 0c 8b 05 7c 39 58 ff 01 c0 39 c3 73 21 bf 00 ca 9a 3b 83 c3 01 e8 01 81 28 e7 48 89 c7 e8 59 8a 71 e7 &amp;lt;49&amp;gt; 8b 84 24 98 06 00 00 a8 01 74 d3 49 8b 84 24 98 06 00 00 a8 01
[ 9197.920826] RSP: 0000:ffff898681d17e00 EFLAGS: 00010282
[ 9197.920853] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
[ 9197.920891] RDX: 0000000000000000 RSI: ffffffffa8117897 RDI: 0000000000000246
[ 9197.920936] RBP: ffff8985a1c14740 R08: 0000000000000000 R09: ffff8986b13e98c0
[ 9197.920974] R10: 0000000000000000 R11: 000000000000004f R12: ffff89866f0e2000
[ 9197.921012] R13: ffff89868a508380 R14: ffffffffc0e8eb10 R15: ffff898648cbc000
[ 9197.921051] FS:  0000000000000000(0000) GS:ffff8986b1200000(0000) knlGS:0000000000000000
[ 9197.921089] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 9197.921120] CR2: ffff89866f0e2698 CR3: 0000000139335000 CR4: 00000000000006a0
[ 9197.921161] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 9197.921208] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 9197.921246] Call Trace:
[ 9197.921265]  ? _raw_spin_lock_irqsave+0x46/0x80
[ 9197.921302]  ? finish_task_switch+0x1f1/0x280
[ 9197.921350]  ? set_cdt_state+0x40/0x40 [mdt]
[ 9197.921386]  kthread+0x129/0x140
[ 9197.921415]  ? kthread_flush_work_fn+0x10/0x10
[ 9197.921451]  ret_from_fork+0x1f/0x30
[ 9197.921472] Modules linked in: lustre(O) ofd(O) osp(O) lod(O) ost(O) mdt(O) mdd(O) mgs(O) osd_ldiskfs(O) ldiskfs(O) lquota(O) lfsck(O) obdecho(O) mgc(O) mdc(O) lov(O) osc(O) lmv(O) fid(O) fld(O) ptlrpc(O) obdclass(O) ksocklnd(O) lnet(O) libcfs(O) zfs(O) zunicode(O) zzstd(O) zlua(O) zcommon(O) znvpair(O) zavl(O) icp(O) spl(O) [last unloaded: libcfs]
[ 9197.921647] CR2: ffff89866f0e2698
[ 9197.921668] ---[ end trace a7d48f6687796264 ]---
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;here is bt:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
PID: 481978   TASK: ffff89868a508380  CPU: 1    COMMAND: &lt;span class=&quot;code-quote&quot;&gt;&quot;hsm_cdtr&quot;&lt;/span&gt;
 #0 [ffff898681d17c68] panic at ffffffffa80b9786
    /tmp/kernel/kernel/panic.c: 299
 #1 [ffff898681d17d00] no_context at ffffffffa80a9563
    /tmp/kernel/arch/x86/mm/fault.c: 799
 #2 [ffff898681d17d50] page_fault at ffffffffa8600f0e
    /tmp/kernel/arch/x86/entry/entry_64.S: 1220
    [exception RIP: mdt_coordinator+215]
    RIP: ffffffffc0e8ebe7  RSP: ffff898681d17e00  RFLAGS: 00010282
    RAX: 0000000000000000  RBX: 0000000000000001  RCX: 0000000000000000
    RDX: 0000000000000000  RSI: ffffffffa8117897  RDI: 0000000000000246
    RBP: ffff8985a1c14740   R8: 0000000000000000   R9: ffff8986b13e98c0
    R10: 0000000000000000  R11: 000000000000004f  R12: ffff89866f0e2000
    R13: ffff89868a508380  R14: ffffffffc0e8eb10  R15: ffff898648cbc000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
    /home/lustre/linux-4.18.0-305.25.1.el8_4/./arch/x86/include/asm/bitops.h: 324
 #3 [ffff898681d17f10] kthread at ffffffffa80d5199
    /tmp/kernel/kernel/kthread.c: 340
 #4 [ffff898681d17f50] ret_from_fork at ffffffffa860019f
    /tmp/kernel/arch/x86/entry/entry_64.S: 325
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;GFP was hit at:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (!test_bit(MDT_FL_CFGLOG, &amp;amp;mdt-&amp;gt;mdt_state) &amp;amp;&amp;amp; i &amp;lt; obd_timeout) {
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="384506" author="bzzz" created="Fri, 1 Sep 2023 06:19:59 +0000"  >&lt;p&gt;this helped:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c
index 439e0cc130..90f2c270df 100644
--- a/lustre/mdt/mdt_coordinator.c
+++ b/lustre/mdt/mdt_coordinator.c
@@ -605,6 +605,7 @@ &lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; mdt_coordinator(void *data)
 
        cdt_start_pending_restore(mdt, cdt);
        set_cdt_state(cdt, CDT_RUNNING);
+       wake_up(&amp;amp;cdt-&amp;gt;cdt_waitq);
 
        &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (1) {
                &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; i;
@@ -1227,6 +1228,7 @@ &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; mdt_hsm_cdt_stop(struct mdt_device *mdt)
        &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; rc;
 
        ENTRY;
+       wait_event(cdt-&amp;gt;cdt_waitq, cdt-&amp;gt;cdt_state != CDT_INIT);
        &lt;span class=&quot;code-comment&quot;&gt;/* stop coordinator thread */&lt;/span&gt;
        rc = set_cdt_state(cdt, CDT_STOPPING);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc == 0) {
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="384559" author="gerrit" created="Fri, 1 Sep 2023 15:46:03 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52222&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52222&lt;/a&gt;&lt;br/&gt;
Subject: Revert &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16235&quot; title=&quot;cdt_agent_record_hash_add() ASSERTION( carl0-&amp;gt;carl_cat_idx == carl1-&amp;gt;carl_cat_idx ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16235&quot;&gt;LU-16235&lt;/a&gt; hsm: check CDT state before adding actions llog&quot;&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: da1da220fe5176331145bc967d0b1a86f8f26433&lt;/p&gt;</comment>
                            <comment id="389660" author="nangelinas" created="Tue, 17 Oct 2023 21:03:03 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=eaujames&quot; class=&quot;user-hover&quot; rel=&quot;eaujames&quot;&gt;eaujames&lt;/a&gt;, could you please see some questions re if we need the revert patch in &lt;a href=&quot;https://review.whamcloud.com/#/c/fs/lustre-release/+/52222?&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/fs/lustre-release/+/52222?&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="389810" author="eaujames" created="Wed, 18 Oct 2023 16:39:46 +0000"  >&lt;p&gt;I have abandoned the revert and merge it with the &lt;a href=&quot;https://review.whamcloud.com/51256&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/51256&lt;/a&gt; (&quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16356&quot; title=&quot;high contention on cdt_request_lock causes clients to hang&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16356&quot;&gt;LU-16356&lt;/a&gt; hsm: add running ref to the coordinator &quot;)&lt;/p&gt;</comment>
                            <comment id="399454" author="gerrit" created="Fri, 12 Jan 2024 12:00:36 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/53660&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/53660&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16235&quot; title=&quot;cdt_agent_record_hash_add() ASSERTION( carl0-&amp;gt;carl_cat_idx == carl1-&amp;gt;carl_cat_idx ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16235&quot;&gt;LU-16235&lt;/a&gt; hsm: check CDT state before adding actions llog&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_15&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2217ee7e5b18aee5fe0c9d6135194487afa38db5&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="59606">LU-13689</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="73448">LU-16356</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i032qv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>