<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:44:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11519] sanity-hsm: test_90 MDS crash in mdt_cdt_waiting_cb()</title>
                <link>https://jira.whamcloud.com/browse/LU-11519</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Andreas Dilger  &amp;lt;adilger@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run review-dne-zfs-part-2:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/864ae762-d020-11e8-82f2-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/864ae762-d020-11e8-82f2-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;BUG: unable to handle kernel paging request at 00000000240cd204
 IP: mdt_cdt_waiting_cb.isra.25+0x357/0xc10 [mdt]
 CPU: 0 PID: 25301 Comm: hsm_cdtr Kdump: loaded Tainted: P
 RIP: 0010:[&amp;lt;ffffffffc1230617&amp;gt;]  [&amp;lt;ffffffffc1230617&amp;gt;] mdt_cdt_waiting_cb.isra.25+0x357/0xc10 [mdt]
Call Trace:
mdt_coordinator_cb+0x162/0x290 [mdt]
llog_process_thread+0x852/0x1550 [obdclass]
llog_process_or_fork+0xbc/0x450 [obdclass]
llog_cat_process_cb+0x239/0x250 [obdclass]
llog_process_thread+0x852/0x1550 [obdclass]
llog_process_or_fork+0xbc/0x450 [obdclass]
llog_cat_process_or_fork+0x199/0x2a0 [obdclass]
llog_cat_process+0x2e/0x30 [obdclass]
cdt_llog_process+0xc6/0x3a0 [mdt]
mdt_coordinator+0x541/0x19f0 [mdt]
kthread+0xd1/0xe0
ret_from_fork_nospec_begin+0x21/0x21
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="53601">LU-11519</key>
            <summary>sanity-hsm: test_90 MDS crash in mdt_cdt_waiting_cb()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jhammond">John Hammond</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 15 Oct 2018 14:35:12 +0000</created>
                <updated>Wed, 19 Dec 2018 21:21:16 +0000</updated>
                            <resolved>Tue, 27 Nov 2018 05:12:42 +0000</resolved>
                                    <version>Lustre 2.12.0</version>
                                    <fixVersion>Lustre 2.12.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="235159" author="adilger" created="Fri, 19 Oct 2018 16:24:01 +0000"  >&lt;p&gt;+1 in review-dne-part-2 test:&lt;br/&gt;
 &lt;a href=&quot;https://testing.whamcloud.com/test_sets/32eb66fe-d3a5-11e8-ad90-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/32eb66fe-d3a5-11e8-ad90-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="235171" author="jamesanunez" created="Fri, 19 Oct 2018 21:58:28 +0000"  >&lt;p&gt;We are seeing sanity-hsm test 13 crash at about a 5% failure rate with this call trace. All are for DNE with ZFS configurations.&lt;/p&gt;

&lt;p&gt;Some example are at&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/b61b466e-d34d-11e8-9238-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/b61b466e-d34d-11e8-9238-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/d47f7fc4-d313-11e8-82f2-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/d47f7fc4-d313-11e8-82f2-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="235245" author="adilger" created="Tue, 23 Oct 2018 01:59:12 +0000"  >&lt;p&gt;+1 on master: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/23d994e6-d644-11e8-ad90-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/23d994e6-d644-11e8-ad90-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="235466" author="ys" created="Thu, 25 Oct 2018 04:50:20 +0000"  >&lt;p&gt;The crash dump cannot be loaded to analysis. &lt;/p&gt;</comment>
                            <comment id="235818" author="adilger" created="Mon, 29 Oct 2018 17:26:02 +0000"  >&lt;p&gt;+1 on master &lt;a href=&quot;https://testing.whamcloud.com/test_sets/30e877d2-d6d1-11e8-9238-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/30e877d2-d6d1-11e8-9238-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;There are actually many failures like this. &lt;/p&gt;</comment>
                            <comment id="236169" author="adilger" created="Thu, 1 Nov 2018 22:01:46 +0000"  >&lt;p&gt;John, could you please take a look at this, it looks like a coordinator bug and has been crashing the MDS fairly commonly lately.&lt;/p&gt;</comment>
                            <comment id="236250" author="jhammond" created="Fri, 2 Nov 2018 19:09:20 +0000"  >&lt;p&gt;I grabbed the most recent failure from maloo &lt;a href=&quot;https://testing.whamcloud.com/test_sets/27781c7e-dde4-11e8-975a-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/27781c7e-dde4-11e8-975a-52540065bddc&lt;/a&gt; which uses build 59629:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 5043.117404] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[ 5043.119825] IP: [&amp;lt;ffffffffc1327747&amp;gt;] mdt_cdt_waiting_cb.isra.25+0x357/0xc10 [mdt]
[ 5043.122276] PGD 0

$ mkdir /tmp/LU-11519
$ cd /tmp/LU-11519
$ wget &apos;https://build.whamcloud.com/job/lustre-reviews/59629/arch=x86_64,build_type=server,distro=el7,ib_stack=inkernel/artifact/artifacts/RPMS/x86_64/lustre-debuginfo-2.11.56_56_g098be7d-1.el7.x86_64.rpm&apos;
...
$ rpm2cpio lustre-debuginfo-2.11.56_56_g098be7d-1.el7.x86_64.rpm | cpio -id
$ nm ./usr/lib/debug/lib/modules/3.10.0-862.14.4.el7_lustre.x86_64/extra/lustre/fs/mdt.ko.debug | awk &apos;$3 == &quot;mdt_cdt_waiting_cb.isra.25&quot; { print $1; }&apos;
000000000005f3f0
$ printf &apos;%#lx\n&apos; $(( 0x357 + 0x5f3f0 ))
0x5f747
$ addr2line -e ./usr/lib/debug/lib/modules/3.10.0-862.14.4.el7_lustre.x86_64/extra/lustre/fs/mdt.ko.debug --functions --inlines 0x5f747
mdt_cdt_waiting_cb
/usr/src/debug/lustre-2.11.56_56_g098be7d/lustre/mdt/mdt_coordinator.c:228
$ cat -n ./usr/src/debug/lustre-2.11.56_56_g098be7d/lustre/mdt/mdt_coordinator.c | sed -n &apos;220,230p&apos;
   220				} while (request-&amp;gt;hal_used_sz + hai_size &amp;gt;
   221					 LDLM_MAXREQSIZE);
   222			} else if (hsd-&amp;gt;hsd_housekeeping) {
   223				struct hsm_scan_request *tmp;
   224	
   225				/* Discard the (whole) last hal */
   226				hsd-&amp;gt;hsd_request_count--;
   227				tmp = &amp;amp;hsd-&amp;gt;hsd_request[hsd-&amp;gt;hsd_request_count];
   228				hsd-&amp;gt;hsd_action_count -= tmp-&amp;gt;hal-&amp;gt;hal_count;
   229				OBD_FREE(tmp-&amp;gt;hal, tmp-&amp;gt;hal_sz);
   230			} else {
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So it looks like tmp-&amp;gt;hal is NULL.&lt;/p&gt;</comment>
                            <comment id="236260" author="adilger" created="Fri, 2 Nov 2018 20:25:19 +0000"  >&lt;blockquote&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;$ nm ...
$ printf ..
$ addr2line -e ...
$ cat -n ...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;&lt;/blockquote&gt;

&lt;p&gt;I typically use the following, which is considerably easier:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;$ gdb .../mdt.ko.debug -ex &quot;list *(mdt_cdt_waiting_cb+0x357)&quot; -ex quit
(gdb) list *(mdt_cdt_waiting_cb+0x357)
0x5f777 is in mdt_cdt_waiting_cb (/usr/src/lustre-head/lustre/mdt/mdt_coordinator.c:228).
223                             struct hsm_scan_request *tmp;
224     
225                             /* Discard the (whole) last hal */
226                             hsd-&amp;gt;hsd_request_count--;
227                             tmp = &amp;amp;hsd-&amp;gt;hsd_request[hsd-&amp;gt;hsd_request_count];
228                             hsd-&amp;gt;hsd_action_count -= tmp-&amp;gt;hal-&amp;gt;hal_count;
229                             OBD_FREE(tmp-&amp;gt;hal, tmp-&amp;gt;hal_sz);
230                     } else {
231                             /* Bailing out, this code path is too hot */
232                             RETURN(LLOG_PROC_BREAK);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Note the &quot;&lt;tt&gt;list *()&lt;/tt&gt;&quot; argument is &lt;em&gt;typically&lt;/em&gt; the same format as how addresses are printed from stack dumps/oops messages, but in this case the &quot;&lt;tt&gt;.isra.25&lt;/tt&gt;&quot; part needs to be dropped.&lt;/p&gt;</comment>
                            <comment id="236280" author="yujian" created="Sat, 3 Nov 2018 16:59:02 +0000"  >&lt;p&gt;sanity-hsm test 13 also hit the same failure in review-dne-zfs-part-2 test session on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/ee81ca36-defc-11e8-a251-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/ee81ca36-defc-11e8-a251-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="236292" author="yujian" created="Sun, 4 Nov 2018 07:21:41 +0000"  >&lt;p&gt;sanity-hsm test 13 hit this failure again:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/b6a66416-dfe5-11e8-89f8-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/b6a66416-dfe5-11e8-89f8-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="236327" author="jhammond" created="Mon, 5 Nov 2018 16:08:35 +0000"  >&lt;p&gt;&amp;gt; I typically use the following, which is considerably easier:&lt;/p&gt;

&lt;p&gt;(I like my way because it makes inlining much easier to understand. Which is not an issue here. Maybe gcc has a better way to do this in those cases.)&lt;/p&gt;</comment>
                            <comment id="236333" author="gerrit" created="Mon, 5 Nov 2018 17:51:17 +0000"  >&lt;p&gt;John L. Hammond (jhammond@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/33580&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33580&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11519&quot; title=&quot;sanity-hsm: test_90 MDS crash in mdt_cdt_waiting_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11519&quot;&gt;&lt;del&gt;LU-11519&lt;/del&gt;&lt;/a&gt; hsm: handle hsd_request_count == 0 properly&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 5021ced380ad820a5f92ac1b41363f5919d79e16&lt;/p&gt;</comment>
                            <comment id="236419" author="gerrit" created="Tue, 6 Nov 2018 13:15:36 +0000"  >&lt;p&gt;Quentin Bouget (quentin.bouget@cea.fr) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/33590&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33590&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11519&quot; title=&quot;sanity-hsm: test_90 MDS crash in mdt_cdt_waiting_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11519&quot;&gt;&lt;del&gt;LU-11519&lt;/del&gt;&lt;/a&gt; hsm: improve the testing of hsm.max_requests&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f56c3963b1314e641044464b50e8a549762c751e&lt;/p&gt;</comment>
                            <comment id="237308" author="gerrit" created="Wed, 21 Nov 2018 04:06:12 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/33580/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33580/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11519&quot; title=&quot;sanity-hsm: test_90 MDS crash in mdt_cdt_waiting_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11519&quot;&gt;&lt;del&gt;LU-11519&lt;/del&gt;&lt;/a&gt; hsm: handle hsd_request_count == 0 properly&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: af05afdfb5781806ab3bc059c86c289b01713ade&lt;/p&gt;</comment>
                            <comment id="237501" author="gerrit" created="Tue, 27 Nov 2018 04:57:30 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/33590/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33590/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11519&quot; title=&quot;sanity-hsm: test_90 MDS crash in mdt_cdt_waiting_cb()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11519&quot;&gt;&lt;del&gt;LU-11519&lt;/del&gt;&lt;/a&gt; hsm: improve the testing of hsm.max_requests&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c1df28b34acdb56acff444400fc9a05d8adf493a&lt;/p&gt;</comment>
                            <comment id="237510" author="pjones" created="Tue, 27 Nov 2018 05:12:42 +0000"  >&lt;p&gt;Landed for 2.12&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="53940">LU-11630</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0047j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>