<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:17:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8464] Lustre I/O hung waiting for page</title>
                <link>https://jira.whamcloud.com/browse/LU-8464</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;&amp;gt; PID: 63193  TASK: ffff880902f7e040  CPU: 35  COMMAND: &quot;python&quot;&lt;br/&gt;
&amp;gt;  #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1108&amp;#93;&lt;/span&gt; schedule at ffffffff8141c637&lt;br/&gt;
&amp;gt;  #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1270&amp;#93;&lt;/span&gt; io_schedule at ffffffff8141cd01&lt;br/&gt;
&amp;gt;  #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea12a0&amp;#93;&lt;/span&gt; sleep_on_page at ffffffff8110111e&lt;br/&gt;
&amp;gt;  #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea12b0&amp;#93;&lt;/span&gt; __wait_on_bit at ffffffff8141d4b2&lt;br/&gt;
&amp;gt;  #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea12f0&amp;#93;&lt;/span&gt; wait_on_page_bit at ffffffff81101454&lt;br/&gt;
&amp;gt;  #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1350&amp;#93;&lt;/span&gt; shrink_inactive_list at ffffffff81113c69&lt;br/&gt;
&amp;gt;  #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1530&amp;#93;&lt;/span&gt; shrink_list at ffffffff8111445e&lt;br/&gt;
&amp;gt;  #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1560&amp;#93;&lt;/span&gt; shrink_zone at ffffffff8111496a&lt;br/&gt;
&amp;gt;  #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea16b0&amp;#93;&lt;/span&gt; do_try_to_free_pages at ffffffff81114d8b&lt;br/&gt;
&amp;gt;  #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1750&amp;#93;&lt;/span&gt; try_to_free_mem_cgroup_pages at ffffffff811153dd&lt;br/&gt;
&amp;gt; #10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea17f0&amp;#93;&lt;/span&gt; mem_cgroup_hierarchical_reclaim at ffffffff81151d6d&lt;br/&gt;
&amp;gt; #11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea18a0&amp;#93;&lt;/span&gt; __mem_cgroup_try_charge at ffffffff811539da&lt;br/&gt;
&amp;gt; #12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1990&amp;#93;&lt;/span&gt; mem_cgroup_cache_charge at ffffffff811557f4&lt;br/&gt;
&amp;gt; #13 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea19c0&amp;#93;&lt;/span&gt; add_to_page_cache_locked at ffffffff8110167e&lt;br/&gt;
&amp;gt; #14 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1a00&amp;#93;&lt;/span&gt; add_to_page_cache at ffffffff811017cb&lt;br/&gt;
&amp;gt; #15 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1a30&amp;#93;&lt;/span&gt; add_to_page_cache_lru at ffffffff8110182e&lt;br/&gt;
&amp;gt; #16 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1a50&amp;#93;&lt;/span&gt; grab_cache_page_nowait at ffffffff81101f5b&lt;br/&gt;
&amp;gt; #17 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1a80&amp;#93;&lt;/span&gt; ll_write_begin at ffffffffa086163b &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #18 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1b10&amp;#93;&lt;/span&gt; generic_file_buffered_write at ffffffff811003ce&lt;br/&gt;
&amp;gt; #19 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1bd0&amp;#93;&lt;/span&gt; __generic_file_aio_write at ffffffff81103179&lt;br/&gt;
&amp;gt; #20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1c80&amp;#93;&lt;/span&gt; generic_file_aio_write at ffffffff811033c9&lt;br/&gt;
&amp;gt; #21 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1cc0&amp;#93;&lt;/span&gt; vvp_io_write_start at ffffffffa087493f &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #22 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1d10&amp;#93;&lt;/span&gt; cl_io_start at ffffffffa037f502 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #23 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1d40&amp;#93;&lt;/span&gt; cl_io_loop at ffffffffa0383084 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #24 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1d70&amp;#93;&lt;/span&gt; ll_file_io_generic at ffffffffa0816237 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #25 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1e40&amp;#93;&lt;/span&gt; ll_file_aio_write at ffffffffa0826409 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #26 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1ea0&amp;#93;&lt;/span&gt; ll_file_write at ffffffffa08269ed &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #27 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1f10&amp;#93;&lt;/span&gt; vfs_write at ffffffff8115bd9b&lt;br/&gt;
&amp;gt; #28 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1f40&amp;#93;&lt;/span&gt; sys_write at ffffffff8115bf45&lt;br/&gt;
&amp;gt; #29 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880b44ea1f80&amp;#93;&lt;/span&gt; system_call_fastpath at ffffffff81426aab&lt;br/&gt;
&amp;gt;     RIP: 00002aaaabaa7f40  RSP: 00007ffffffefa10  RFLAGS: 00000282&lt;br/&gt;
&amp;gt;     RAX: 0000000000000001  RBX: ffffffff81426aab  RCX: 00000000000000a4&lt;br/&gt;
&amp;gt;     RDX: 0000000000400000  RSI: 00002aabd2b6e000  RDI: 0000000000000004&lt;br/&gt;
&amp;gt;     RBP: 00002aabd2b6e000   R8: 0000000000000000   R9: 0000000000000000&lt;br/&gt;
&amp;gt;     R10: 000000007a1e3f60  R11: 0000000000000246  R12: 0000000000000000&lt;br/&gt;
&amp;gt;     R13: 0000000000400000  R14: 000000007a1e3e80  R15: 0000000000400000&lt;br/&gt;
&amp;gt;     ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b&lt;/p&gt;

&lt;p&gt;&amp;gt; PID: 65447  TASK: ffff88032d84e040  CPU: 15  COMMAND: &quot;slurmstepd&quot;&lt;br/&gt;
&amp;gt;  #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1388&amp;#93;&lt;/span&gt; schedule at ffffffff8141c637&lt;br/&gt;
&amp;gt;  #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b14f0&amp;#93;&lt;/span&gt; io_schedule at ffffffff8141cd01&lt;br/&gt;
&amp;gt;  #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1520&amp;#93;&lt;/span&gt; sleep_on_page at ffffffff8110111e&lt;br/&gt;
&amp;gt;  #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1530&amp;#93;&lt;/span&gt; __wait_on_bit at ffffffff8141d4b2&lt;br/&gt;
&amp;gt;  #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1570&amp;#93;&lt;/span&gt; wait_on_page_bit at ffffffff81101454&lt;br/&gt;
&amp;gt;  #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b15d0&amp;#93;&lt;/span&gt; shrink_inactive_list at ffffffff81113c69&lt;br/&gt;
&amp;gt;  #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b17b0&amp;#93;&lt;/span&gt; shrink_list at ffffffff8111445e&lt;br/&gt;
&amp;gt;  #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b17e0&amp;#93;&lt;/span&gt; shrink_zone at ffffffff8111496a&lt;br/&gt;
&amp;gt;  #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1930&amp;#93;&lt;/span&gt; do_try_to_free_pages at ffffffff81114d8b&lt;br/&gt;
&amp;gt;  #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b19d0&amp;#93;&lt;/span&gt; try_to_free_mem_cgroup_pages at ffffffff811153dd&lt;br/&gt;
&amp;gt; #10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1a70&amp;#93;&lt;/span&gt; mem_cgroup_hierarchical_reclaim at ffffffff81151d6d&lt;br/&gt;
&amp;gt; #11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1b20&amp;#93;&lt;/span&gt; __mem_cgroup_try_charge at ffffffff811539da&lt;br/&gt;
&amp;gt; #12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1c10&amp;#93;&lt;/span&gt; mem_cgroup_prepare_migration at ffffffff81155b92&lt;br/&gt;
&amp;gt; #13 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1c50&amp;#93;&lt;/span&gt; migrate_pages at ffffffff8114e0d1&lt;br/&gt;
&amp;gt; #14 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1ce0&amp;#93;&lt;/span&gt; compact_zone at ffffffff81146aac&lt;br/&gt;
&amp;gt; #15 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1de0&amp;#93;&lt;/span&gt; __compact_pgdat at ffffffff8114725b&lt;br/&gt;
&amp;gt; #16 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1e20&amp;#93;&lt;/span&gt; compact_node at ffffffff811472df&lt;br/&gt;
&amp;gt; #17 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1e90&amp;#93;&lt;/span&gt; sysfs_compact_node at ffffffff81147341&lt;br/&gt;
&amp;gt; #18 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1eb0&amp;#93;&lt;/span&gt; sysdev_store at ffffffff812b7bf0&lt;br/&gt;
&amp;gt; #19 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1ec0&amp;#93;&lt;/span&gt; sysfs_write_file at ffffffff811c4d27&lt;br/&gt;
&amp;gt; #20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1f10&amp;#93;&lt;/span&gt; vfs_write at ffffffff8115bd9b&lt;br/&gt;
&amp;gt; #21 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1f40&amp;#93;&lt;/span&gt; sys_write at ffffffff8115bf45&lt;br/&gt;
&amp;gt; #20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1f10&amp;#93;&lt;/span&gt; vfs_write at ffffffff8115bd9b&lt;br/&gt;
&amp;gt; #21 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1f40&amp;#93;&lt;/span&gt; sys_write at ffffffff8115bf45&lt;br/&gt;
&amp;gt; #22 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880f939b1f80&amp;#93;&lt;/span&gt; system_call_fastpath at ffffffff81426aab&lt;/p&gt;</description>
                <environment></environment>
        <key id="38562">LU-8464</key>
            <summary>Lustre I/O hung waiting for page</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="askulysh">Andriy Skulysh</assignee>
                                    <reporter username="askulysh">Andriy Skulysh</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Tue, 2 Aug 2016 08:23:48 +0000</created>
                <updated>Fri, 28 Oct 2022 01:11:33 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="160503" author="askulysh" created="Tue, 2 Aug 2016 08:25:53 +0000"  >&lt;p&gt;&amp;gt; PID: 14502  TASK: ffff881fedf78040  CPU: 13  COMMAND: &quot;ptlrpcd_11&quot;&lt;br/&gt;
&amp;gt;  #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223660&amp;#93;&lt;/span&gt; schedule at ffffffff8141c637&lt;br/&gt;
&amp;gt;  #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee2237c8&amp;#93;&lt;/span&gt; io_schedule at ffffffff8141cd01&lt;br/&gt;
&amp;gt;  #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee2237f8&amp;#93;&lt;/span&gt; sleep_on_page at ffffffff8110111e&lt;br/&gt;
&amp;gt;  #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223808&amp;#93;&lt;/span&gt; __wait_on_bit_lock at ffffffff8141d28a&lt;br/&gt;
&amp;gt;  #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223848&amp;#93;&lt;/span&gt; __lock_page at ffffffff81101109&lt;br/&gt;
&amp;gt;  #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee2238a8&amp;#93;&lt;/span&gt; vvp_page_make_ready at ffffffffa08716ed &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;  #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee2238d8&amp;#93;&lt;/span&gt; cl_page_make_ready at ffffffffa03751c5 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;  #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223928&amp;#93;&lt;/span&gt; osc_extent_make_ready at ffffffffa070f9ac &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;  #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223a68&amp;#93;&lt;/span&gt; osc_io_unplug0 at ffffffffa0713e5e &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;  #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223c98&amp;#93;&lt;/span&gt; osc_io_unplug at ffffffffa07156c1 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223ca8&amp;#93;&lt;/span&gt; brw_queue_work at ffffffffa06e6426 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223cc8&amp;#93;&lt;/span&gt; work_interpreter at ffffffffa04955ae &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223ce8&amp;#93;&lt;/span&gt; ptlrpc_check_set at ffffffffa049e85c &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #13 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223d78&amp;#93;&lt;/span&gt; ptlrpcd_check at ffffffffa04caaab &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #14 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223dd8&amp;#93;&lt;/span&gt; ptlrpcd at ffffffffa04cb15b &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #15 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223ee8&amp;#93;&lt;/span&gt; kthread at ffffffff8107374e&lt;br/&gt;
&amp;gt; #16 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff881fee223f48&amp;#93;&lt;/span&gt; kernel_thread_helper at ffffffff81427bb4&lt;/p&gt;</comment>
                            <comment id="160504" author="askulysh" created="Tue, 2 Aug 2016 08:30:11 +0000"  >&lt;p&gt;Thread PID: 65447 tries to migrate 2nd page from extent&lt;br/&gt;
and waits for PID 14502  to complete writeback.&lt;/p&gt;

&lt;p&gt;But these 2 pages are going to fit in one RPC. So PID 14502 can&apos;t complete IO because the 1st page was locked by  pid 65447.&lt;/p&gt;</comment>
                            <comment id="160634" author="gerrit" created="Wed, 3 Aug 2016 01:38:52 +0000"  >&lt;p&gt;Andriy Skulysh (andriy.skulysh@seagate.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/21652&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/21652&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8464&quot; title=&quot;Lustre I/O hung waiting for page&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8464&quot;&gt;LU-8464&lt;/a&gt; llite: Lustre I/O hung waiting for page&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 379e1b2fe3d5fe98972e72887eed60801fbc6828&lt;/p&gt;</comment>
                            <comment id="160967" author="jay" created="Fri, 5 Aug 2016 18:55:23 +0000"  >&lt;p&gt;does migrate_pages() lock one page and then wait another page to complete write back?&lt;/p&gt;</comment>
                            <comment id="162150" author="jay" created="Wed, 17 Aug 2016 02:10:04 +0000"  >&lt;p&gt;I spent some time on this issue and found some thing new(what you didn&apos;t mentio on the ticket).&lt;/p&gt;

&lt;p&gt;I think this issue is due to an implementation of memory cgroup. As you can see from the code __unmap_and_move():&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;                lock_page(page);
        }

        &lt;span class=&quot;code-comment&quot;&gt;/* charge against &lt;span class=&quot;code-keyword&quot;&gt;new&lt;/span&gt; page */&lt;/span&gt;
        mem_cgroup_prepare_migration(page, newpage, &amp;amp;mem);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;it locks a page and charges mem cgroup, which in turns try to free a page from the cgroup. In the process of freeing page, it waits for the page write back to complete. This causes deadlock.&lt;/p&gt;

&lt;p&gt;Let me put things together. &lt;/p&gt;

&lt;p&gt;Ptlrpc thread:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lock page A;
set writeback to page A;
unlock page A;
lock page B     &amp;lt;- blocked
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;and migrating thread:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;/* try to migrate page B */
lock page B;
/* since there is no free slot of this process&apos; memory control group */
try to free page A;
wait for A&apos;s writeback to complete;  &amp;lt;- blocked
free page A;
wait for B&apos;s writeback to complete;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It&apos;s a really bad choice for migrate_pages() to lock a page and wait for writeback on another one to complete.&lt;/p&gt;

&lt;p&gt;This problem is hard to fix in Lustre but way easier to get fixed in kernel, actually it turns out that linux-4.x kernels don&apos;t have this problem any more.&lt;br/&gt;
We can just move &apos;wait for B&apos;s writeback to complete&apos; to the location before trying to free page A and this problem should be fixed.&lt;/p&gt;

&lt;p&gt;I will take a further look to see since which kernel this problem has been fixed.&lt;/p&gt;</comment>
                            <comment id="162405" author="amk" created="Thu, 18 Aug 2016 17:46:51 +0000"  >&lt;p&gt;Thanks Jinshan. I&apos;ll pass this bug on to our kernel engineers. If you can identify the kernel where it&apos;s fixed, I&apos;m sure that would be a big help.&lt;/p&gt;</comment>
                            <comment id="162414" author="jay" created="Thu, 18 Aug 2016 18:26:40 +0000"  >&lt;p&gt;it seems like the fix appears since 3.18 kernels.&lt;/p&gt;</comment>
                            <comment id="162446" author="amk" created="Thu, 18 Aug 2016 20:56:13 +0000"  >&lt;p&gt;The upstream commit that removed the mem_cgroup_prepare_migration()&lt;br/&gt;
from __unmap_and_move() is&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=0a31bc97c80c3fa87b32c091d9a930ac19cd0c40&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=0a31bc97c80c3fa87b32c091d9a930ac19cd0c40&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="180113" author="green" created="Mon, 9 Jan 2017 18:43:14 +0000"  >&lt;p&gt;I opened a redhat bugzilla ticket about this to backport the patch into sme next rhel7.x kernel. (you probably cannot see it since by default all such tickets are private):&lt;br/&gt;
&lt;a href=&quot;https://bugzilla.redhat.com/show_bug.cgi?id=1410571&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugzilla.redhat.com/show_bug.cgi?id=1410571&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="351050" author="adilger" created="Fri, 28 Oct 2022 01:11:33 +0000"  >&lt;p&gt;Oleg, was this patch ever landed in newer el7 releases?  I&apos;m wondering if this should be closed as &quot;Won&apos;t Fix&quot; since the patch is not really needed anymore, AFAICS.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="21692">LU-4171</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyj93:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>