<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:57:10 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12963] recovery-small test 110k hangs</title>
                <link>https://jira.whamcloud.com/browse/LU-12963</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;recovery-small test_110k hangs. It looks like these hangs started on 21 JULY 2019 for Lustre 2.12.56.17 and only seen on master/b2_13.&lt;/p&gt;

&lt;p&gt;Looking at the console logs for the hang at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/b732f8c4-e37a-11e9-add9-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/b732f8c4-e37a-11e9-add9-52540065bddc&lt;/a&gt;, we see the following:&lt;/p&gt;

&lt;p&gt;client1 (vm11)&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[18823.080898] Lustre: DEBUG MARKER: == recovery-small test 110k: FID_QUERY failed during recovery ======================================== 15:24:43 (1569770683)
[18826.019012] LustreError: 11-0: lustre-MDT0001-mdc-ffff986e5f303800: operation mds_statfs to node 10.9.4.165@tcp failed: rc = -107
[18826.020320] LustreError: Skipped 4 previous similar messages
[22427.043134] sysrq: SysRq : Changing Loglevel
[22427.043724] sysrq: Loglevel set to 8
[22427.046389] sysrq: SysRq : Show State
[22427.046821]   task                        PC stack   pid father
[22427.047454] systemd         S    0     1      0 0x00000000
[22427.048036] Call Trace:
[22427.048363]  ? __schedule+0x254/0x840
[22427.048753]  schedule+0x28/0x70
[22427.049116]  schedule_hrtimeout_range_clock+0x18d/0x1a0
[22427.049667]  ? ep_scan_ready_list.constprop.22+0x20c/0x230
[22427.050253]  ep_poll+0x3c8/0x410
[22427.050622]  ? wake_up_q+0x70/0x70
[22427.050994]  do_epoll_wait+0xb0/0xd0
[22427.051394]  __x64_sys_epoll_wait+0x1a/0x20
[22427.051842]  do_syscall_64+0x5b/0x1b0
[22427.052260]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[22427.052789] RIP: 0033:0x7f49dea94d97
[22427.053197] Code: Bad RIP value.
[22427.053545] RSP: 002b:00007fffe0967b50 EFLAGS: 00000293 ORIG_RAX: 00000000000000e8
[22427.054327] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f49dea94d97
[22427.055061] RDX: 000000000000005c RSI: 00007fffe0967b90 RDI: 0000000000000004
[22427.055777] RBP: 00007fffe0967b90 R08: 0000000000000000 R09: 7465677261742e79
[22427.056507] R10: 00000000ffffffff R11: 0000000000000293 R12: 000000000000005c
[22427.057238] R13: 00000000ffffffff R14: 00007fffe0967b90 R15: 0000000000000001
[22427.057954] kthreadd        S    0     2      0 0x80000000
[22427.058521] Call Trace:
[22427.058804]  ? __schedule+0x254/0x840
[22427.059207]  schedule+0x28/0x70
[22427.059550]  kthreadd+0x2e4/0x300
[22427.059912]  ? kthread_create_on_cpu+0xa0/0xa0
[22427.060397]  ret_from_fork+0x35/0x40
[22427.060794] rcu_gp          I    0     3      2 0x80000000
[22427.061372] Workqueue:            (null) (rcu_gp)
[22427.061855] Call Trace:
[22427.062147]  ? __schedule+0x254/0x840
[22427.062544]  schedule+0x28/0x70
[22427.062886]  rescuer_thread+0x2ee/0x340
[22427.063319]  ? flush_rcu_work+0x30/0x30
[22427.063721]  kthread+0x112/0x130
[22427.064084]  ? kthread_bind+0x30/0x30
[22427.064476]  ret_from_fork+0x35/0x40
[22427.064854] rcu_par_gp      I    0     4      2 0x80000000
[22427.065429] Call Trace:
[22427.065708]  ? __schedule+0x254/0x840
[22427.066113]  schedule+0x28/0x70
[22427.066455]  rescuer_thread+0x2ee/0x340
[22427.066855]  ? flush_rcu_work+0x30/0x30
[22427.067282]  kthread+0x112/0x130
[22427.067637]  ? kthread_bind+0x30/0x30
[22427.068038]  ret_from_fork+0x35/0x40
&#8230;
[22428.500219] kworker/1:1     D    0 16467      2 0x80000080
[22428.500791] Workqueue: events key_garbage_collector
[22428.501318] Call Trace:
[22428.501596]  ? __schedule+0x254/0x840
[22428.501993]  schedule+0x28/0x70
[22428.502343]  schedule_timeout+0x26d/0x390
[22428.502766]  ? check_preempt_curr+0x62/0x90
[22428.503222]  wait_for_completion+0x11f/0x190
[22428.503667]  ? wake_up_q+0x70/0x70
[22428.504050]  __wait_rcu_gp+0x12c/0x160
[22428.504453]  synchronize_rcu+0x76/0x80
[22428.504853]  ? __call_rcu+0x280/0x280
[22428.505260]  ? __bpf_trace_rcu_utilization+0x10/0x10
[22428.505773]  key_garbage_collector+0x1d7/0x3e0
[22428.506268]  process_one_work+0x1a7/0x360
[22428.506690]  worker_thread+0x30/0x390
[22428.507102]  ? pwq_unbound_release_workfn+0xd0/0xd0
[22428.507608]  kthread+0x112/0x130
[22428.507958]  ? kthread_bind+0x30/0x30
[22428.508363]  ret_from_fork+0x35/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;client2 (vm12)&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[18827.149580] Lustre: DEBUG MARKER: == recovery-small test 110k: FID_QUERY failed during recovery ======================================== 15:24:43 (1569770683)
[22433.833049] sysrq: SysRq : Changing Loglevel
[22433.833608] sysrq: Loglevel set to 8
[22433.838786] sysrq: SysRq : Show State
[22433.839253]   task                        PC stack   pid father
[22433.839871] systemd         S    0     1      0 0x00000000
[22433.840457] Call Trace:
[22433.840780]  ? __schedule+0x254/0x840
[22433.841198]  schedule+0x28/0x70
[22433.841551]  schedule_hrtimeout_range_clock+0x18d/0x1a0
[22433.842134]  ? ep_scan_ready_list.constprop.22+0x20c/0x230
[22433.842706]  ep_poll+0x3c8/0x410
[22433.843101]  ? wake_up_q+0x70/0x70
[22433.843477]  do_epoll_wait+0xb0/0xd0
[22433.843864]  __x64_sys_epoll_wait+0x1a/0x20
[22433.844330]  do_syscall_64+0x5b/0x1b0
[22433.844735]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[22433.845288] RIP: 0033:0x7fd7332c2d97
[22433.845678] Code: Bad RIP value.
[22433.846045] RSP: 002b:00007ffdb0a7b080 EFLAGS: 00000293 ORIG_RAX: 00000000000000e8
[22433.846810] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fd7332c2d97
[22433.847548] RDX: 0000000000000059 RSI: 00007ffdb0a7b0c0 RDI: 0000000000000004
[22433.848290] RBP: 00007ffdb0a7b0c0 R08: 0000000000000000 R09: 7465677261742e79
[22433.849032] R10: 00000000ffffffff R11: 0000000000000293 R12: 0000000000000059
[22433.849755] R13: 00000000ffffffff R14: 00007ffdb0a7b0c0 R15: 0000000000000001
[22433.850497] kthreadd        S    0     2      0 0x80000000
[22433.851081] Call Trace:
[22433.851364]  ? __schedule+0x254/0x840
[22433.851756]  schedule+0x28/0x70
[22433.852118]  kthreadd+0x2e4/0x300
[22433.852494]  ? kthread_create_on_cpu+0xa0/0xa0
[22433.852965]  ret_from_fork+0x35/0x40
[22433.853384] rcu_gp          I    0     3      2 0x80000000
[22433.853959] Call Trace:
[22433.854259]  ? __schedule+0x254/0x840
[22433.854655]  schedule+0x28/0x70
[22433.855020]  rescuer_thread+0x2ee/0x340
[22433.855443]  ? flush_rcu_work+0x30/0x30
[22433.855856]  kthread+0x112/0x130
[22433.856239]  ? kthread_bind+0x30/0x30
[22433.856635]  ret_from_fork+0x35/0x40
&#8230;
[ 8826.737348] kworker/1:1     D    0  5439      2 0x80000080
[ 8826.737911] Workqueue: events key_garbage_collector
[ 8826.738582] Call Trace:
[ 8826.738989]  ? __schedule+0x254/0x840
[ 8826.739599]  ? pcpu_next_unpop+0x37/0x50
[ 8826.740216]  schedule+0x28/0x70
[ 8826.740713]  schedule_timeout+0x26d/0x390
[ 8826.741367]  ? _cond_resched+0x15/0x30
[ 8826.741859]  ? cpumask_next+0x17/0x20
[ 8826.742372]  ? kmem_cache_open+0x3f0/0x430
[ 8826.742804]  wait_for_completion+0x11f/0x190
[ 8826.743451]  ? wake_up_q+0x70/0x70
[ 8826.743999]  __wait_rcu_gp+0x12c/0x160
[ 8826.744654]  synchronize_rcu+0x76/0x80
[ 8826.745261]  ? __call_rcu+0x280/0x280
[ 8826.745857]  ? __bpf_trace_rcu_utilization+0x10/0x10
[ 8826.746413]  key_garbage_collector+0x1d7/0x3e0
[ 8826.747031]  process_one_work+0x1a7/0x360
[ 8826.747687]  worker_thread+0x1fa/0x390
[ 8826.748294]  ? pwq_unbound_release_workfn+0xd0/0xd0
[ 8826.748839]  kthread+0x112/0x130
[ 8826.749434]  ? kthread_bind+0x30/0x30
[ 8826.750218]  ret_from_fork+0x35/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;MDS1,3 (vm6)&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[18819.270132] Lustre: DEBUG MARKER: == recovery-small test 110k: FID_QUERY failed during recovery ======================================== 15:24:43 (1569770683)
[18822.297574] LustreError: 11-0: lustre-MDT0001-osp-MDT0002: operation mds_statfs to node 10.9.4.165@tcp failed: rc = -107
[18822.299461] LustreError: Skipped 6 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;MDS2,4 (vm7)&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[18824.196956] Lustre: DEBUG MARKER: == recovery-small test 110k: FID_QUERY failed during recovery ======================================== 15:24:43 (1569770683)
[18824.510121] Lustre: DEBUG MARKER: grep -c /mnt/lustre-mds2&apos; &apos; /proc/mounts || true
[18825.124512] Lustre: DEBUG MARKER: umount -d /mnt/lustre-mds2
[18825.423041] Lustre: Failing over lustre-MDT0001
[18825.423922] Lustre: Skipped 7 previous similar messages
[18825.657887] Lustre: server umount lustre-MDT0001 complete
[18825.658935] Lustre: Skipped 7 previous similar messages
[18826.212991] Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;
[18826.212991] lctl dl | grep &apos; ST &apos; || true
[18826.797370] Lustre: DEBUG MARKER: modprobe dm-flakey;
[18826.797370] 			 dmsetup targets | grep -q flakey
[18826.868811] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[18826.871737] LustreError: Skipped 187 previous similar messages
[18903.428904] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[18903.432265] LustreError: Skipped 197 previous similar messages
[19053.669128] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[19053.672181] LustreError: Skipped 387 previous similar messages
[19354.148751] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[19354.152173] LustreError: Skipped 777 previous similar messages
[19954.814825] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.118@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[19954.817893] LustreError: Skipped 1553 previous similar messages
[20556.069346] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[20556.072539] LustreError: Skipped 1555 previous similar messages
[21157.029130] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[21157.032971] LustreError: Skipped 1552 previous similar messages
[21757.094270] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.118@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[21757.097259] LustreError: Skipped 1553 previous similar messages
[22358.949018] LustreError: 137-5: lustre-MDT0001_UUID: not available for connect from 10.9.4.165@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[22358.952110] LustreError: Skipped 1555 previous similar messages
[22443.966949] SysRq : Changing Loglevel
[22443.967688] Loglevel set to 8
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We have several instances of this hang:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/74cea80c-03e1-11ea-b934-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/74cea80c-03e1-11ea-b934-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/304f5830-03f9-11ea-a9d7-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/304f5830-03f9-11ea-a9d7-52540065bddc&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="57371">LU-12963</key>
            <summary>recovery-small test 110k hangs</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                    </labels>
                <created>Tue, 12 Nov 2019 20:06:46 +0000</created>
                <updated>Tue, 12 Nov 2019 21:33:43 +0000</updated>
                                            <version>Lustre 2.13.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="258187" author="bzzz" created="Tue, 12 Nov 2019 20:09:03 +0000"  >&lt;p&gt;a dup of just landed &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12809&quot; class=&quot;external-link&quot; rel=&quot;nofollow&quot;&gt;https://jira.whamcloud.com/browse/LU-12809&lt;/a&gt; ?&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="57007">LU-12809</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00pdj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>