<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:46:13 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11704] sanity-lfsck test 2a hangs with &#8216;Pool &apos;lustre-mdt1&apos; has encountered an uncorrectable I/O failure and has been suspended.&#8217;</title>
                <link>https://jira.whamcloud.com/browse/LU-11704</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;sanity-lfsck test_2a hangs for 2.10.6 RC2; logs at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/4b667296-ee58-11e8-b67f-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/4b667296-ee58-11e8-b67f-52540065bddc&lt;/a&gt; . The last thing seen in the client (vm5) test_log is mounting an OST&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;CMD: trevis-37vm7 zfs get -H -o value 						lustre:svname lustre-ost1/ost1
Starting ost1:   lustre-ost1/ost1 /mnt/lustre-ost1
CMD: trevis-37vm7 mkdir -p /mnt/lustre-ost1; mount -t lustre   		                   lustre-ost1/ost1 /mnt/lustre-ost1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In the OSS (vm7) console log, we see many errors&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[19184.117195] Lustre: DEBUG MARKER: mkdir -p /mnt/lustre-ost1; mount -t lustre   		                   lustre-ost1/ost1 /mnt/lustre-ost1
[19214.033512] LustreError: 137-5: lustre-OST0001_UUID: not available for connect from 10.9.5.208@tcp (no target). If you are running an HA pair check that the target is mounted on the other server.
[19214.036476] LustreError: Skipped 24 previous similar messages
[19265.130894] WARNING: MMP writes to pool &apos;lustre-ost1&apos; have not succeeded in over 20s; suspending pool
[19265.132534] WARNING: Pool &apos;lustre-ost1&apos; has encountered an uncorrectable I/O failure and has been suspended.

[19265.193259] WARNING: MMP writes to pool &apos;lustre-ost7&apos; have not succeeded in over 20s; suspending pool
[19265.193297] WARNING: MMP writes to pool &apos;lustre-ost5&apos; have not succeeded in over 20s; suspending pool
[19265.193298] WARNING: Pool &apos;lustre-ost5&apos; has encountered an uncorrectable I/O failure and has been suspended.

[19265.193320] WARNING: MMP writes to pool &apos;lustre-ost6&apos; have not succeeded in over 20s; suspending pool
[19265.193320] WARNING: Pool &apos;lustre-ost6&apos; has encountered an uncorrectable I/O failure and has been suspended.

[19265.201515] WARNING: Pool &apos;lustre-ost7&apos; has encountered an uncorrectable I/O failure and has been suspended.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In the MDS (vm8) console log, we see errors, call traces for blocked processes and buffer I/O errors &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[19181.922927] Lustre: DEBUG MARKER: lctl set_param -n mdt.lustre*.enable_remote_dir=1
[19203.638907] WARNING: MMP writes to pool &apos;lustre-mdt1&apos; have not succeeded in over 20s; suspending pool
[19203.640572] WARNING: Pool &apos;lustre-mdt1&apos; has encountered an uncorrectable I/O failure and has been suspended.

[19273.862749] LustreError: 11-0: lustre-OST0003-osc-MDT0000: operation ost_connect to node 10.9.5.207@tcp failed: rc = -19
[19320.390856] INFO: task jbd2/vda1-8:264 blocked for more than 120 seconds.
[19320.392208] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[19320.393530] jbd2/vda1-8     D ffff9ed240013f40     0   264      2 0x00000000
[19320.394880] Call Trace:
[19320.395379]  [&amp;lt;ffffffffb3329774&amp;gt;] ? blk_mq_run_hw_queue+0x14/0x20
[19320.396412]  [&amp;lt;ffffffffb3330914&amp;gt;] ? blk_mq_sched_insert_requests+0x64/0x80
[19320.397586]  [&amp;lt;ffffffffb3716ec0&amp;gt;] ? bit_wait+0x50/0x50
[19320.398511]  [&amp;lt;ffffffffb3718f39&amp;gt;] schedule+0x29/0x70
[19320.399374]  [&amp;lt;ffffffffb37168a9&amp;gt;] schedule_timeout+0x239/0x2c0
[19320.400389]  [&amp;lt;ffffffffb306a14e&amp;gt;] ? kvm_clock_get_cycles+0x1e/0x20
[19320.401483]  [&amp;lt;ffffffffb30fa982&amp;gt;] ? ktime_get_ts64+0x52/0xf0
[19320.402442]  [&amp;lt;ffffffffb306a14e&amp;gt;] ? kvm_clock_get_cycles+0x1e/0x20
[19320.403477]  [&amp;lt;ffffffffb30fa982&amp;gt;] ? ktime_get_ts64+0x52/0xf0
[19320.404441]  [&amp;lt;ffffffffb3716ec0&amp;gt;] ? bit_wait+0x50/0x50
[19320.405317]  [&amp;lt;ffffffffb371844d&amp;gt;] io_schedule_timeout+0xad/0x130
[19320.406335]  [&amp;lt;ffffffffb37184e8&amp;gt;] io_schedule+0x18/0x20
[19320.407240]  [&amp;lt;ffffffffb3716ed1&amp;gt;] bit_wait_io+0x11/0x50
[19320.408143]  [&amp;lt;ffffffffb37169f7&amp;gt;] __wait_on_bit+0x67/0x90
[19320.409072]  [&amp;lt;ffffffffb31962a1&amp;gt;] wait_on_page_bit+0x81/0xa0
[19320.410054]  [&amp;lt;ffffffffb30befd0&amp;gt;] ? wake_bit_function+0x40/0x40
[19320.411063]  [&amp;lt;ffffffffb31963d1&amp;gt;] __filemap_fdatawait_range+0x111/0x190
[19320.412194]  [&amp;lt;ffffffffb3196464&amp;gt;] filemap_fdatawait_range+0x14/0x30
[19320.413258]  [&amp;lt;ffffffffb31964a7&amp;gt;] filemap_fdatawait+0x27/0x30
[19320.414288]  [&amp;lt;ffffffffc0375af1&amp;gt;] jbd2_journal_commit_transaction+0xa81/0x19b0 [jbd2]
[19320.415623]  [&amp;lt;ffffffffb302a59e&amp;gt;] ? __switch_to+0xce/0x580
[19320.416569]  [&amp;lt;ffffffffb30cc7f0&amp;gt;] ? finish_task_switch+0x50/0x170
[19320.417623]  [&amp;lt;ffffffffc037bab9&amp;gt;] kjournald2+0xc9/0x260 [jbd2]
[19320.418672]  [&amp;lt;ffffffffb30bef10&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[19320.419683]  [&amp;lt;ffffffffc037b9f0&amp;gt;] ? commit_timeout+0x10/0x10 [jbd2]
[19320.420756]  [&amp;lt;ffffffffb30bdf21&amp;gt;] kthread+0xd1/0xe0
[19320.421606]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.422658]  [&amp;lt;ffffffffb37255f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[19320.423767]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.424806] INFO: task kworker/u4:2:22918 blocked for more than 120 seconds.
[19320.425988] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[19320.427306] kworker/u4:2    D ffff9ed274490fd0     0 22918      2 0x00000080
[19320.428596] Workqueue: writeback bdi_writeback_workfn (flush-253:0)
[19320.429724] Call Trace:
[19320.430179]  [&amp;lt;ffffffffb3716ec0&amp;gt;] ? bit_wait+0x50/0x50
[19320.431054]  [&amp;lt;ffffffffb3718f39&amp;gt;] schedule+0x29/0x70
[19320.431914]  [&amp;lt;ffffffffb37168a9&amp;gt;] schedule_timeout+0x239/0x2c0
[19320.432929]  [&amp;lt;ffffffffb31fbd1e&amp;gt;] ? __kmalloc+0x2e/0x230
[19320.433853]  [&amp;lt;ffffffffc02b60a4&amp;gt;] ? virtqueue_add+0x1c4/0x4d0 [virtio_ring]
[19320.435081]  [&amp;lt;ffffffffb306a14e&amp;gt;] ? kvm_clock_get_cycles+0x1e/0x20
[19320.436139]  [&amp;lt;ffffffffb30fa982&amp;gt;] ? ktime_get_ts64+0x52/0xf0
[19320.437099]  [&amp;lt;ffffffffb3716ec0&amp;gt;] ? bit_wait+0x50/0x50
[19320.437983]  [&amp;lt;ffffffffb371844d&amp;gt;] io_schedule_timeout+0xad/0x130
[19320.439000]  [&amp;lt;ffffffffb37184e8&amp;gt;] io_schedule+0x18/0x20
[19320.439905]  [&amp;lt;ffffffffb3716ed1&amp;gt;] bit_wait_io+0x11/0x50
[19320.440788]  [&amp;lt;ffffffffb37169f7&amp;gt;] __wait_on_bit+0x67/0x90
[19320.441703]  [&amp;lt;ffffffffb3716ec0&amp;gt;] ? bit_wait+0x50/0x50
[19320.442586]  [&amp;lt;ffffffffb3716b61&amp;gt;] out_of_line_wait_on_bit+0x81/0xb0
[19320.443646]  [&amp;lt;ffffffffb30befd0&amp;gt;] ? wake_bit_function+0x40/0x40
[19320.444662]  [&amp;lt;ffffffffc0373ce5&amp;gt;] do_get_write_access+0x285/0x4d0 [jbd2]
[19320.445800]  [&amp;lt;ffffffffb325649d&amp;gt;] ? __getblk+0x2d/0x300
[19320.446745]  [&amp;lt;ffffffffc0373f57&amp;gt;] jbd2_journal_get_write_access+0x27/0x40 [jbd2]
[19320.448075]  [&amp;lt;ffffffffc03c7beb&amp;gt;] __ext4_journal_get_write_access+0x3b/0x80 [ext4]
[19320.449355]  [&amp;lt;ffffffffc0398f30&amp;gt;] ext4_reserve_inode_write+0x70/0xa0 [ext4]
[19320.450534]  [&amp;lt;ffffffffc039c850&amp;gt;] ? ext4_dirty_inode+0x40/0x60 [ext4]
[19320.451628]  [&amp;lt;ffffffffc0398fb3&amp;gt;] ext4_mark_inode_dirty+0x53/0x220 [ext4]
[19320.452779]  [&amp;lt;ffffffffc039c850&amp;gt;] ext4_dirty_inode+0x40/0x60 [ext4]
[19320.453841]  [&amp;lt;ffffffffb324d55d&amp;gt;] __mark_inode_dirty+0x16d/0x270
[19320.454868]  [&amp;lt;ffffffffc039588d&amp;gt;] ext4_da_update_reserve_space+0x13d/0x190 [ext4]
[19320.456142]  [&amp;lt;ffffffffc03c5994&amp;gt;] ext4_ext_map_blocks+0xaa4/0xf60 [ext4]
[19320.457277]  [&amp;lt;ffffffffc0395a75&amp;gt;] ext4_map_blocks+0x155/0x6e0 [ext4]
[19320.458369]  [&amp;lt;ffffffffc03995ac&amp;gt;] ? ext4_writepages+0x42c/0xd40 [ext4]
[19320.459476]  [&amp;lt;ffffffffc039984a&amp;gt;] ext4_writepages+0x6ca/0xd40 [ext4]
[19320.460558]  [&amp;lt;ffffffffb31a3b81&amp;gt;] do_writepages+0x21/0x50
[19320.461476]  [&amp;lt;ffffffffb324cfd0&amp;gt;] __writeback_single_inode+0x40/0x260
[19320.462559]  [&amp;lt;ffffffffb324da64&amp;gt;] writeback_sb_inodes+0x1c4/0x490
[19320.463584]  [&amp;lt;ffffffffb324ddcf&amp;gt;] __writeback_inodes_wb+0x9f/0xd0
[19320.464611]  [&amp;lt;ffffffffb324e603&amp;gt;] wb_writeback+0x263/0x2f0
[19320.465541]  [&amp;lt;ffffffffb323ab0c&amp;gt;] ? get_nr_inodes+0x4c/0x70
[19320.466486]  [&amp;lt;ffffffffb324ef8b&amp;gt;] bdi_writeback_workfn+0x2cb/0x460
[19320.467529]  [&amp;lt;ffffffffb30b613f&amp;gt;] process_one_work+0x17f/0x440
[19320.468516]  [&amp;lt;ffffffffb30b71d6&amp;gt;] worker_thread+0x126/0x3c0
[19320.469458]  [&amp;lt;ffffffffb30b70b0&amp;gt;] ? manage_workers.isra.24+0x2a0/0x2a0
[19320.470550]  [&amp;lt;ffffffffb30bdf21&amp;gt;] kthread+0xd1/0xe0
[19320.471384]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.472408]  [&amp;lt;ffffffffb37255f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[19320.473497]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.474524] INFO: task txg_sync:7615 blocked for more than 120 seconds.
[19320.475631] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[19320.476931] txg_sync        D ffff9ed29c024f10     0  7615      2 0x00000080
[19320.478183] Call Trace:
[19320.478645]  [&amp;lt;ffffffffc0478fc7&amp;gt;] ? taskq_dispatch_ent+0x57/0x170 [spl]
[19320.479755]  [&amp;lt;ffffffffb3718f39&amp;gt;] schedule+0x29/0x70
[19320.480598]  [&amp;lt;ffffffffb37168a9&amp;gt;] schedule_timeout+0x239/0x2c0
[19320.481836]  [&amp;lt;ffffffffc0694e7f&amp;gt;] ? zio_taskq_dispatch+0x8f/0xa0 [zfs]
[19320.482943]  [&amp;lt;ffffffffb306a14e&amp;gt;] ? kvm_clock_get_cycles+0x1e/0x20
[19320.483985]  [&amp;lt;ffffffffb371844d&amp;gt;] io_schedule_timeout+0xad/0x130
[19320.485018]  [&amp;lt;ffffffffb30beac6&amp;gt;] ? prepare_to_wait_exclusive+0x56/0x90
[19320.486137]  [&amp;lt;ffffffffb37184e8&amp;gt;] io_schedule+0x18/0x20
[19320.487035]  [&amp;lt;ffffffffc047d192&amp;gt;] cv_wait_common+0xb2/0x150 [spl]
[19320.488073]  [&amp;lt;ffffffffb30bef10&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[19320.489066]  [&amp;lt;ffffffffc047d268&amp;gt;] __cv_wait_io+0x18/0x20 [spl]
[19320.490126]  [&amp;lt;ffffffffc0699023&amp;gt;] zio_wait+0x113/0x1c0 [zfs]
[19320.491201]  [&amp;lt;ffffffffc060f40f&amp;gt;] dsl_pool_sync+0xbf/0x440 [zfs]
[19320.492246]  [&amp;lt;ffffffffc062d4b7&amp;gt;] spa_sync+0x437/0xd90 [zfs]
[19320.493211]  [&amp;lt;ffffffffb30d2022&amp;gt;] ? default_wake_function+0x12/0x20
[19320.494259]  [&amp;lt;ffffffffb30caba4&amp;gt;] ? __wake_up+0x44/0x50
[19320.495176]  [&amp;lt;ffffffffc0641c41&amp;gt;] txg_sync_thread+0x301/0x510 [zfs]
[19320.496257]  [&amp;lt;ffffffffc0641940&amp;gt;] ? txg_fini+0x2a0/0x2a0 [zfs]
[19320.497245]  [&amp;lt;ffffffffc0478013&amp;gt;] thread_generic_wrapper+0x73/0x80 [spl]
[19320.498374]  [&amp;lt;ffffffffc0477fa0&amp;gt;] ? __thread_exit+0x20/0x20 [spl]
[19320.499401]  [&amp;lt;ffffffffb30bdf21&amp;gt;] kthread+0xd1/0xe0
[19320.500253]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.501286]  [&amp;lt;ffffffffb37255f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[19320.502385]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.503415] INFO: task ll_mgs_0001:7835 blocked for more than 120 seconds.
[19320.504564] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[19320.505860] ll_mgs_0001     D ffff9ed294508000     0  7835      2 0x00000080
[19320.507100] Call Trace:
[19320.507537]  [&amp;lt;ffffffffb3718f39&amp;gt;] schedule+0x29/0x70
[19320.508381]  [&amp;lt;ffffffffc047d205&amp;gt;] cv_wait_common+0x125/0x150 [spl]
[19320.509429]  [&amp;lt;ffffffffb30bef10&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[19320.510423]  [&amp;lt;ffffffffc047d245&amp;gt;] __cv_wait+0x15/0x20 [spl]
[19320.511401]  [&amp;lt;ffffffffc0640bdf&amp;gt;] txg_wait_synced+0xef/0x140 [zfs]
[19320.512482]  [&amp;lt;ffffffffc124834e&amp;gt;] osd_trans_stop+0x4be/0x5a0 [osd_zfs]
[19320.513667]  [&amp;lt;ffffffffc12d3cdb&amp;gt;] mgs_ir_update+0x2eb/0xb70 [mgs]
[19320.514701]  [&amp;lt;ffffffffc12b3d57&amp;gt;] mgs_target_reg+0x787/0x1370 [mgs]
[19320.516108]  [&amp;lt;ffffffffc0ffd24f&amp;gt;] ? lustre_pack_reply_flags+0x6f/0x1e0 [ptlrpc]
[19320.517377]  [&amp;lt;ffffffffc0ffd3d1&amp;gt;] ? lustre_pack_reply+0x11/0x20 [ptlrpc]
[19320.518624]  [&amp;lt;ffffffffc106038a&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
[19320.519804]  [&amp;lt;ffffffffc1008e4b&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
[19320.521082]  [&amp;lt;ffffffffb30c7c4b&amp;gt;] ? __wake_up_common+0x5b/0x90
[19320.522095]  [&amp;lt;ffffffffc100c592&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
[19320.523182]  [&amp;lt;ffffffffc100bb00&amp;gt;] ? ptlrpc_register_service+0xe30/0xe30 [ptlrpc]
[19320.524423]  [&amp;lt;ffffffffb30bdf21&amp;gt;] kthread+0xd1/0xe0
[19320.525254]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.526276]  [&amp;lt;ffffffffb37255f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[19320.527373]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.528395] INFO: task ll_mgs_0002:7836 blocked for more than 120 seconds.
[19320.529543] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[19320.530836] ll_mgs_0002     D ffff9ed276b3eeb0     0  7836      2 0x00000080
[19320.532123] Call Trace:
[19320.532554]  [&amp;lt;ffffffffb3719e59&amp;gt;] schedule_preempt_disabled+0x29/0x70
[19320.533622]  [&amp;lt;ffffffffb3717c17&amp;gt;] __mutex_lock_slowpath+0xc7/0x1d0
[19320.534664]  [&amp;lt;ffffffffb3716fff&amp;gt;] mutex_lock+0x1f/0x2f
[19320.535545]  [&amp;lt;ffffffffc12d4af9&amp;gt;] mgs_get_ir_logs+0x599/0x10c0 [mgs]
[19320.536647]  [&amp;lt;ffffffffc1035196&amp;gt;] ? null_alloc_rs+0x186/0x340 [ptlrpc]
[19320.537770]  [&amp;lt;ffffffffc0ffea20&amp;gt;] ? lustre_swab_mgs_target_info+0x70/0x70 [ptlrpc]
[19320.539030]  [&amp;lt;ffffffffc12af810&amp;gt;] mgs_config_read+0xc0/0x1c0 [mgs]
[19320.540130]  [&amp;lt;ffffffffc106038a&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
[19320.541310]  [&amp;lt;ffffffffc1008e4b&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
[19320.542589]  [&amp;lt;ffffffffb30c7c4b&amp;gt;] ? __wake_up_common+0x5b/0x90
[19320.543606]  [&amp;lt;ffffffffc100c592&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
[19320.544689]  [&amp;lt;ffffffffc100bb00&amp;gt;] ? ptlrpc_register_service+0xe30/0xe30 [ptlrpc]
[19320.545929]  [&amp;lt;ffffffffb30bdf21&amp;gt;] kthread+0xd1/0xe0
[19320.546754]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.547771]  [&amp;lt;ffffffffb37255f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[19320.548863]  [&amp;lt;ffffffffb30bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[19320.571934] blk_update_request: I/O error, dev vda, sector 1586528
[19320.573557] EXT4-fs warning (device vda1): ext4_end_bio:316: I/O error -5 writing to inode 394026 (offset 1753088 size 12288 starting block 198319)
[19320.575691] Buffer I/O error on device vda1, logical block 198060
[19320.576701] Buffer I/O error on device vda1, logical block 198061
[19320.577708] Buffer I/O error on device vda1, logical block 198062
[19320.578726] blk_update_request: I/O error, dev vda, sector 1658040
[19320.578734] blk_update_request: I/O error, dev vda, sector 49363312
[19320.580787] EXT4-fs warning (device vda1): ext4_end_bio:316: I/O error -5 writing to inode 394025 (offset 9007104 size 73728 starting block 207273)
[19320.582923] Buffer I/O error on device vda1, logical block 206999
[19320.583959] Buffer I/O error on device vda1, logical block 207000
[19320.584973] Buffer I/O error on device vda1, logical block 207001
[19320.585982] Buffer I/O error on device vda1, logical block 207002
[19320.586984] Buffer I/O error on device vda1, logical block 207003
[19320.588012] Buffer I/O error on device vda1, logical block 207004
[19320.589028] Buffer I/O error on device vda1, logical block 207005
[19320.590074] blk_update_request: I/O error, dev vda, sector 1586520
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;For this run, we are using ZFS 0.7.9-1 and RHEL 7.5. &lt;/p&gt;

&lt;p&gt;So far, this has only been seen once. This looks like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9845&quot; title=&quot;ost-pools test_22 hangs with &#8216;WARNING: Pool &amp;#39;lustre-mdt1&amp;#39; has encountered an uncorrectable I/O failure and has been suspended.&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9845&quot;&gt;&lt;del&gt;LU-9845&lt;/del&gt;&lt;/a&gt;. We haven&#8217;t seen &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9845&quot; title=&quot;ost-pools test_22 hangs with &#8216;WARNING: Pool &amp;#39;lustre-mdt1&amp;#39; has encountered an uncorrectable I/O failure and has been suspended.&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9845&quot;&gt;&lt;del&gt;LU-9845&lt;/del&gt;&lt;/a&gt; in a while so I&#8217;ve opened a new ticket to track this issue. &lt;/p&gt;</description>
                <environment>ZFS 0.7.9-1 and RHEL 7.5</environment>
        <key id="54107">LU-11704</key>
            <summary>sanity-lfsck test 2a hangs with &#8216;Pool &apos;lustre-mdt1&apos; has encountered an uncorrectable I/O failure and has been suspended.&#8217;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="6">Not a Bug</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                    </labels>
                <created>Mon, 26 Nov 2018 23:08:21 +0000</created>
                <updated>Tue, 27 Nov 2018 00:11:58 +0000</updated>
                            <resolved>Tue, 27 Nov 2018 00:11:58 +0000</resolved>
                                    <version>Lustre 2.10.6</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="237484" author="adilger" created="Mon, 26 Nov 2018 23:25:27 +0000"  >&lt;p&gt;If there are also problems with the MDS ext4 writes, this looks like some kind of issue with the VM host, and not really a Lustre problem.  In particular, &lt;tt&gt;Buffer I/O error on device vda1, logical block 198060&lt;/tt&gt; means that there is a problem below the level of the VM guest that is beyond our control.&lt;/p&gt;

&lt;p&gt;Maybe check the &lt;tt&gt;trevis-37&lt;/tt&gt; host logs to see if the disk is having problems.  I think Maloo allows searching for results by a particular host/guest like &lt;tt&gt;trevis-37&amp;#42;&lt;/tt&gt; or similar, so it may be possible to see if that node is having more problems.&lt;/p&gt;</comment>
                            <comment id="237488" author="jamesanunez" created="Tue, 27 Nov 2018 00:10:40 +0000"  >&lt;p&gt;Good comment. Yes, there are at least two other instances of trevis-37 hanging in similar ways; &lt;a href=&quot;https://testing.whamcloud.com/test_sets/9ea67c96-ee56-11e8-86c0-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/9ea67c96-ee56-11e8-86c0-52540065bddc&lt;/a&gt; and &lt;a href=&quot;https://testing.whamcloud.com/test_sets/008db6d8-ee5f-11e8-bfe1-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/008db6d8-ee5f-11e8-bfe1-52540065bddc&lt;/a&gt; . &lt;/p&gt;

&lt;p&gt;The failures on this node are on-going and have not happened for the past four days.&lt;/p&gt;

&lt;p&gt;I&apos;ll close this ticket and will open a DCO ticket if trevis-37 continues to have issues.&lt;/p&gt;</comment>
                            <comment id="237489" author="jamesanunez" created="Tue, 27 Nov 2018 00:11:58 +0000"  >&lt;p&gt;Not a Lustre bug.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="47700">LU-9845</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i006vr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>