<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:21:32 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-15815] fast_read/stale data/reclaim workround causes SIGBUS</title>
                <link>https://jira.whamcloud.com/browse/LU-15815</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The fast_read stale data workaround from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; can cause applications to receive a spurious SIGBUS when reclaim runs concurrently with page fault handler for mmaped files.&lt;/p&gt;</description>
                <environment></environment>
        <key id="70132">LU-15815</key>
            <summary>fast_read/stale data/reclaim workround causes SIGBUS</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="panda">Andrew Perepechko</assignee>
                                    <reporter username="jhammond">John Hammond</reporter>
                        <labels>
                    </labels>
                <created>Tue, 3 May 2022 19:11:08 +0000</created>
                <updated>Fri, 19 Jan 2024 22:19:45 +0000</updated>
                            <resolved>Mon, 9 May 2022 02:09:16 +0000</resolved>
                                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="333703" author="gerrit" created="Tue, 3 May 2022 19:48:21 +0000"  >&lt;p&gt;&quot;John L. Hammond &amp;lt;jhammond@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/47204&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/47204&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15815&quot; title=&quot;fast_read/stale data/reclaim workround causes SIGBUS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15815&quot;&gt;&lt;del&gt;LU-15815&lt;/del&gt;&lt;/a&gt; llite: disable fast_read and workaround&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d1720e2b774bb4137324fb9e80c3d6151e3b9c0f&lt;/p&gt;</comment>
                            <comment id="333710" author="jhammond" created="Tue, 3 May 2022 20:50:06 +0000"  >&lt;p&gt;Here is a reliable if inconvenient reproducer:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;$LUSTRE/tests/llmount.sh
lctl set_param debug_mb=512 debug=&apos;+trace page mmap&apos;
lctl set_param llite.*.max_read_ahead_mb=0 # Not needed to reproduce.

yum install openmpi openmpi-devel
mv /usr/lib64/openmpi /mnt/lustre/openmpi
ln -s /mnt/lustre/openmpi /usr/lib64/openmpi
cd /mnt/lustre
wget http://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.9.tar.gz
tar -xzf osu-micro-benchmarks-5.9.tar.gz
cd osu-micro-benchmarks-5.9
./configure CC=/usr/lib64/openmpi/bin/mpicc CXX=/usr/lib64/openmpi/bin/mpicxx &amp;amp;&amp;amp; make -j4

while true; do echo 3 &amp;gt; /proc/sys/vm/drop_caches ; done &amp;amp;
lctl clear
while /mnt/lustre/openmpi/bin/mpirun --allow-run-as-root -np 2 --oversubscribe --host k /mnt/lustre/osu-micro-benchmarks-5.9//mpi/\
collective/osu_alltoall -f -m 65536; do
  true
done
lctl dk &amp;gt; /tmp/osu_alltoall.dk
kill %1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;...
[k:18404] *** Process received signal ***
[k:18404] Signal: Bus error (7)
[k:18404] Signal code: Non-existant physical address (2)
[k:18404] Failing at address: 0x7fdb20a62e73
[k:18404] [ 0] /lib64/libpthread.so.0(+0xf5f0)[0x7fdb1fcf45f0]
[k:18404] [ 1] /lib64/ld-linux-x86-64.so.2(+0x19d72)[0x7fdb20f88d72]
[k:18404] [ 2] /lib64/ld-linux-x86-64.so.2(+0x8ae2)[0x7fdb20f77ae2]
[k:18404] [ 3] /lib64/ld-linux-x86-64.so.2(+0x14254)[0x7fdb20f83254]
[k:18404] [ 4] /lib64/ld-linux-x86-64.so.2(+0xf784)[0x7fdb20f7e784]
[k:18404] [ 5] /lib64/ld-linux-x86-64.so.2(+0x13b3b)[0x7fdb20f82b3b]
[k:18404] [ 6] /lib64/libdl.so.2(+0xeeb)[0x7fdb2084beeb]
[k:18404] [ 7] /lib64/ld-linux-x86-64.so.2(+0xf784)[0x7fdb20f7e784]
[k:18404] [ 8] /lib64/libdl.so.2(+0x14ed)[0x7fdb2084c4ed]
[k:18404] [ 9] /lib64/libdl.so.2(dlopen+0x31)[0x7fdb2084bf81]
[k:18404] [10] /usr/lib64/openmpi/lib/libopen-pal.so.13(+0x59edd)[0x7fdb20aa8edd]
[k:18404] [11] /usr/lib64/openmpi/lib/libopen-pal.so.13(+0x3c7d1)[0x7fdb20a8b7d1]
[k:18404] [12] /usr/lib64/openmpi/lib/libopen-pal.so.13(mca_base_component_find+0x78a)[0x7fdb20a8cd4a]
[k:18404] [13] /usr/lib64/openmpi/lib/libopen-pal.so.13(mca_base_framework_components_register+0x56)[0x7fdb20a96cb6]
[k:18404] [14] /usr/lib64/openmpi/lib/libopen-pal.so.13(mca_base_framework_register+0x196)[0x7fdb20a97166]
[k:18404] [15] /usr/lib64/openmpi/lib/libopen-pal.so.13(mca_base_framework_open+0x12)[0x7fdb20a971c2]
[k:18404] [16] /usr/lib64/openmpi/lib/openmpi/mca_ess_hnp.so(+0x4a48)[0x7fdb1eadea48]
[k:18404] [17] /usr/lib64/openmpi/lib/libopen-rte.so.12(orte_init+0x168)[0x7fdb20d09398]
[k:18404] [18] /mnt/lustre/openmpi/bin/mpirun[0x40449f]
[k:18404] [19] /mnt/lustre/openmpi/bin/mpirun[0x40361d]
[k:18404] [20] /lib64/libc.so.6(__libc_start_main+0xf5)[0x7fdb1f939505]
[k:18404] [21] /mnt/lustre/openmpi/bin/mpirun[0x403539]
[k:18404] *** End of error message ***
Bus error (core dumped)
# grep SIGBUS /tmp/osu_alltoall.dk
00000080:00008000:3.0:1651610586.302378:0:18404:0:(vvp_io.c:1353:vvp_io_kernel_fault()) got addr 00007fdb20a62000 - SIGBUS
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Trimmed logs are attached as osu_alltoall_trimmed.dk. PID 18404 is osu_alltoall, 16737 is bash wrring to drop_caches.&lt;/p&gt;</comment>
                            <comment id="333941" author="gerrit" created="Thu, 5 May 2022 18:48:53 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/47204/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/47204/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15815&quot; title=&quot;fast_read/stale data/reclaim workround causes SIGBUS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15815&quot;&gt;&lt;del&gt;LU-15815&lt;/del&gt;&lt;/a&gt; llite: disable fast_read and workaround&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 201ade9442828fbb3bedb3b31154d51ead10af41&lt;/p&gt;</comment>
                            <comment id="333950" author="pjones" created="Thu, 5 May 2022 19:12:29 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                            <comment id="334017" author="spitzcor" created="Fri, 6 May 2022 14:16:21 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=jhammond&quot; class=&quot;user-hover&quot; rel=&quot;jhammond&quot;&gt;jhammond&lt;/a&gt;, can we re-open this ticket?  It makes no sense to me to revert &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; when you&apos;ve confirmed will reintroduce a data corruption.  Can we find another solution please?&lt;/p&gt;</comment>
                            <comment id="334043" author="pjones" created="Fri, 6 May 2022 17:08:08 +0000"  >&lt;p&gt;Reopening until this discussion is settled&lt;/p&gt;</comment>
                            <comment id="334049" author="jhammond" created="Fri, 6 May 2022 17:25:08 +0000"  >&lt;p&gt;&amp;gt;  Can we find another solution please?&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=panda&quot; class=&quot;user-hover&quot; rel=&quot;panda&quot;&gt;panda&lt;/a&gt; shared some ideas on &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15819&quot; title=&quot;Executables run from Lustre may receive spurious SIGBUS signals&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15819&quot;&gt;&lt;del&gt;LU-15819&lt;/del&gt;&lt;/a&gt;. Is that work in progress?&lt;/p&gt;</comment>
                            <comment id="334051" author="panda" created="Fri, 6 May 2022 17:39:08 +0000"  >&lt;blockquote&gt;&lt;p&gt;Is that work in progress?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Yes, it is.&lt;/p&gt;</comment>
                            <comment id="334058" author="spiechurski" created="Fri, 6 May 2022 18:28:49 +0000"  >&lt;p&gt;We tested this patch on a 2.12.6 basis, and disabling fast_read is actually catastrophic in terms of performance on our customer code.&lt;/p&gt;

&lt;p&gt;The revert of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; had already been done several months ago (due to the SIGBUS errors), but we recently found that it was causing the corruptions on mmap&apos;ed pages. Disabling fast_read on top of it is causing a x5 slowdown on the application.&lt;/p&gt;

&lt;p&gt;So, yes please can we find another solution ?&lt;/p&gt;</comment>
                            <comment id="334079" author="jhammond" created="Fri, 6 May 2022 21:43:06 +0000"  >&lt;p&gt;&amp;gt; The revert of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; had already been done several months ago (due to the SIGBUS errors),&lt;/p&gt;

&lt;p&gt;Where? On what branch?&lt;/p&gt;

&lt;p&gt;&amp;gt; but we recently found that it was causing the corruptions on mmap&apos;ed pages.&lt;/p&gt;

&lt;p&gt;Do you mean that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; causes corruption? Or that reverting it causes corruption? Did you open an issue?&lt;/p&gt;

&lt;p&gt;&amp;gt; So, yes please can we find another solution ?&lt;/p&gt;

&lt;p&gt;There seems to be a lot of hopeful use of &quot;we&quot; in this ticket.&lt;/p&gt;</comment>
                            <comment id="334081" author="jhammond" created="Fri, 6 May 2022 21:56:00 +0000"  >&lt;p&gt;With &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; in place we have the following:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  179.609120] mmap_fault_vs_d (11006): drop_caches: 3
[  179.613492] mmap_fault_vs_d (11006): drop_caches: 3
[  179.618134] mmap_fault_vs_d (11006): drop_caches: 3
[  179.621452] LustreError: 11007:0:(osc_cache.c:2495:osc_teardown_async_page()) extent 000000009717064a@{[3 -&amp;gt; 3/1023], [2|0|-|cache|wi|00000000b2cc94b9], [28672|1|+|-|00000000d41a65d9|1024|00000000baa546d7]} trunc at 3.
[  179.622752] LustreError: 11007:0:(osc_cache.c:2495:osc_teardown_async_page()) ### extent: 000000009717064a ns: lustre-OST0000-osc-ffff8ba637eb9000 lock: 00000000d41a65d9/0xa4961a626f53297d lrc: 4/0,1 mode: PW/PW res: [0x19:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 12288-&amp;gt;16383) gid 0 flags: 0x800020000000000 nid: local remote: 0xa4961a626f532984 expref: -99 pid: 11007 timeout: 0 lvb_type: 1
[  179.622988] mmap_fault_vs_d (11006): drop_caches: 3
[  179.625126] LustreError: 11007:0:(osc_page.c:182:osc_page_delete()) page@00000000285d5729[3 00000000214d642c 5 1 00000000baa546d7]
[  179.625126] 
[  179.626325] LustreError: 11007:0:(osc_page.c:182:osc_page_delete()) vvp-page@000000002564fdc0(1:0) vm@00000000c9fa97b2 17ffffc0001035 2:0 ffff8ba5f1774140 3 lru
[  179.626325] 
[  179.627359] LustreError: 11007:0:(osc_page.c:182:osc_page_delete()) lov-page@00000000b1b9d4b5
[  179.627359] 
[  179.628033] LustreError: 11007:0:(osc_page.c:182:osc_page_delete()) osc-page@00000000a3522c1f 3: 1&amp;lt; 0x845fed 2 + - &amp;gt; 2&amp;lt; 12288 0 4096 0x0 0x40420 | 00000000baa546d7 00000000cd5d95e4 00000000b2cc94b9 &amp;gt; 3&amp;lt; 0 0 0 &amp;gt; 4&amp;lt; 0 0 8 667238400 - | - - + - &amp;gt; 5&amp;lt; - - + - | 0 - | 1 - -&amp;gt;
[  179.628033] 
[  179.629656] LustreError: 11007:0:(osc_page.c:182:osc_page_delete()) end page@00000000285d5729
[  179.629656] 
[  179.629826] mmap_fault_vs_d (11006): drop_caches: 3
[  179.630301] LustreError: 11007:0:(osc_page.c:182:osc_page_delete()) Trying to teardown failed: -16
[  179.630302] LustreError: 11007:0:(osc_page.c:183:osc_page_delete()) ASSERTION( 0 ) failed: 
[  179.630304] LustreError: 11007:0:(osc_page.c:183:osc_page_delete()) LBUG
[  179.632209] Pid: 11007, comm: mmap_fault_vs_d 4.18.0-348.7.1.el8.x86_64 #1 SMP Thu Mar 3 10:39:00 CST 2022
[  179.632823] Call Trace TBD:
[  179.633101] [&amp;lt;0&amp;gt;] libcfs_call_trace+0x8f/0xe0 [libcfs]
[  179.633455] [&amp;lt;0&amp;gt;] lbug_with_loc+0x53/0xb0 [libcfs]
[  179.633774] [&amp;lt;0&amp;gt;] osc_page_delete+0x666/0x670 [osc]
[  179.634124] [&amp;lt;0&amp;gt;] cl_page_delete0+0x9d/0x2d0 [obdclass]
[  179.634491] [&amp;lt;0&amp;gt;] cl_page_delete+0x3e/0x130 [obdclass]
[  179.634516] mmap_fault_vs_d (11006): drop_caches: 3
[  179.634854] [&amp;lt;0&amp;gt;] ll_invalidatepage+0xc0/0x230 [lustre]
[  179.634862] [&amp;lt;0&amp;gt;] truncate_cleanup_page+0x8d/0x170
[  179.635861] [&amp;lt;0&amp;gt;] generic_error_remove_page+0x31/0x80
[  179.636274] [&amp;lt;0&amp;gt;] vvp_page_discard+0x49/0x120 [lustre]
[  179.636640] [&amp;lt;0&amp;gt;] cl_page_discard+0x5e/0xc0 [obdclass]
[  179.636993] [&amp;lt;0&amp;gt;] cl_page_list_discard+0x59/0x160 [obdclass]
[  179.637394] [&amp;lt;0&amp;gt;] ll_io_read_page+0x27c/0xd00 [lustre]
[  179.637749] [&amp;lt;0&amp;gt;] ll_readpage+0x133/0xb70 [lustre]
[  179.638076] [&amp;lt;0&amp;gt;] filemap_fault+0x84c/0xa60
[  179.638369] [&amp;lt;0&amp;gt;] vvp_io_fault_start+0x53f/0x12b0 [lustre]
[  179.638753] [&amp;lt;0&amp;gt;] cl_io_start+0x66/0x190 [obdclass]
[  179.638928] mmap_fault_vs_d (11006): drop_caches: 3
[  179.639107] [&amp;lt;0&amp;gt;] cl_io_loop+0xde/0x2a0 [obdclass]
[  179.639755] [&amp;lt;0&amp;gt;] ll_fault+0x916/0xc60 [lustre]
[  179.640066] [&amp;lt;0&amp;gt;] __do_fault+0x48/0x130
[  179.640323] [&amp;lt;0&amp;gt;] do_fault+0x110/0x660
[  179.640572] [&amp;lt;0&amp;gt;] __handle_mm_fault+0xa0f/0xdb0
[  179.640873] [&amp;lt;0&amp;gt;] handle_mm_fault+0x11e/0x310
[  179.641168] [&amp;lt;0&amp;gt;] __do_page_fault+0x317/0x900
[  179.641461] [&amp;lt;0&amp;gt;] do_page_fault+0x87/0x300
[  179.641737] [&amp;lt;0&amp;gt;] page_fault+0x1e/0x30
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;See mmap_fault_vs_drop_caches3.c to reproduce.&lt;/p&gt;</comment>
                            <comment id="334092" author="spiechurski" created="Sat, 7 May 2022 09:39:50 +0000"  >&lt;p&gt;&amp;gt;&amp;gt; The revert of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt;&#160;had already been done several months ago (due to the SIGBUS errors),&lt;/p&gt;

&lt;p&gt;&amp;gt;Where? On what branch?&lt;/p&gt;

&lt;p&gt;The site is ECMWF, and it was done on a 2.12.6 basis. I unfortunately don&apos;t have the whole history as I was not involved in these at the time and this was handled between the local team and DDN directly.&lt;/p&gt;

&lt;p&gt;&amp;gt;&amp;gt; but we recently found that it was causing the corruptions on mmap&apos;ed pages.&lt;/p&gt;

&lt;p&gt;&amp;gt;Do you mean that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt;&#160;causes corruption? Or that reverting it causes corruption? Did you open an issue?&lt;/p&gt;

&lt;p&gt;Reverting it caused the corruption, or at least we get the corruptions with it reverted. As we only understood recently that the crash/hang issues we observed were related to in-memory data corruption, it is actually only an assumption that the revert caused it.&lt;/p&gt;

&lt;p&gt;There is a DDN support case open for this.&lt;/p&gt;

&lt;p&gt;&amp;gt;&amp;gt; So, yes please can we find another solution ?&lt;/p&gt;

&lt;p&gt;&amp;gt;There seems to be a lot of hopeful use of &quot;we&quot; in this ticket.&lt;/p&gt;

&lt;p&gt;Obviously, this last &quot;we&quot; was probably more a &quot;you&quot;. As much as I would like to provide solutions, my understanding of the memory management subsystem is too limited for this.&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Regards,&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Sebastien.&lt;/p&gt;</comment>
                            <comment id="334116" author="paf0186" created="Mon, 9 May 2022 02:08:51 +0000"  >&lt;p&gt;So, we landed a patch from John here to resolve the SIGBUS issue by removing the clearpageuptodate() call in vvp_page_delete, ie, reverting &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt;.&#160; I have more comments on the SIGBUS issue, etc, which I&apos;ll put there, but basically, I think this is correct - It&apos;s clear from the page fault code in the kernel that we can&apos;t unset pageuptodate() without causing problems.&#160; (I was wrong about this previously.)&lt;/p&gt;

&lt;p&gt;So, that means the SIGBUS issue is resolved, but we have to find another way to solve &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;So, I&apos;m going to close &lt;b&gt;this&lt;/b&gt; ticket as resolved, and let&apos;s move the discussion to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt;. &#160;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=panda&quot; class=&quot;user-hover&quot; rel=&quot;panda&quot;&gt;panda&lt;/a&gt;, I&apos;ve added you as a watcher there.&#160; I have a few thoughts on it but I&apos;m still hoping you and/or Shadow have a good idea...&lt;/p&gt;</comment>
                            <comment id="334117" author="paf0186" created="Mon, 9 May 2022 02:09:16 +0000"  >&lt;p&gt;Resolved by revert of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt;.&#160; See &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14541&quot; title=&quot;Memory reclaim caused a stale data read&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14541&quot;&gt;&lt;del&gt;LU-14541&lt;/del&gt;&lt;/a&gt; for further discussion.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="70140">LU-15819</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="39984">LU-8633</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="63444">LU-14541</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="56487">LU-12587</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="72362">LU-16160</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="43528" name="image_2022-05-03_13-25-27.png" size="5717" author="panda" created="Wed, 4 May 2022 18:16:01 +0000"/>
                            <attachment id="43561" name="mmap_fault_vs_drop_caches3.c" size="2396" author="jhammond" created="Fri, 6 May 2022 21:56:15 +0000"/>
                            <attachment id="43491" name="osu_alltoall_trimmed.dk" size="86033" author="jhammond" created="Tue, 3 May 2022 20:50:26 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i02ox3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>