<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:46:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4873] Lustre client hangs in vvp_page_make_ready</title>
                <link>https://jira.whamcloud.com/browse/LU-4873</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Application hangs in vvp_page_make_ready and dumps call traces.&lt;br/&gt;
We applied patch of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4540&quot; title=&quot;Test failure sanity-quota test_8: dbench hung in vvp_page_assume&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4540&quot;&gt;&lt;del&gt;LU-4540&lt;/del&gt;&lt;/a&gt;, but still hit this issue.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Apr  8 01:27:04 r21 kernel: Call Trace:
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81055f96&amp;gt;] ? enqueue_task+0x66/0x80
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff810a2431&amp;gt;] ? ktime_get_ts+0xb1/0xf0
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81119e10&amp;gt;] ? sync_page+0x0/0x50
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff8150e953&amp;gt;] io_schedule+0x73/0xc0
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81119e4d&amp;gt;] sync_page+0x3d/0x50
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff8150f1ba&amp;gt;] __wait_on_bit_lock+0x5a/0xc0
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81119de7&amp;gt;] __lock_page+0x67/0x70
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81096de0&amp;gt;] ? wake_bit_function+0x0/0x50
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa1055cc1&amp;gt;] vvp_page_make_ready+0x271/0x280 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e89149&amp;gt;] cl_page_make_ready+0x89/0x370 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81055ad3&amp;gt;] ? __wake_up+0x53/0x70
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ca069e&amp;gt;] osc_extent_make_ready+0x34e/0xc00 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0c9c113&amp;gt;] ? on_list+0x43/0x50 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0c9c1cd&amp;gt;] ? __osc_list_maint+0xad/0x150 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81055ad3&amp;gt;] ? __wake_up+0x53/0x70
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ca4826&amp;gt;] osc_io_unplug0+0x15e6/0x1f00 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0c9e1ad&amp;gt;] ? osc_extent_tree_dump0+0x28d/0xb20 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ca6ec1&amp;gt;] osc_io_unplug+0x11/0x20 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ca709d&amp;gt;] osc_queue_sync_pages+0x1cd/0x350 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0c985d7&amp;gt;] osc_io_submit+0x1c7/0x4b0 [osc]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e92b3e&amp;gt;] cl_io_submit_rw+0x6e/0x160 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0d2c360&amp;gt;] lov_io_submit+0x2d0/0x4b0 [lov]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0d0f874&amp;gt;] ? lov_merge_lvb_kms+0x124/0x530 [lov]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e92b3e&amp;gt;] cl_io_submit_rw+0x6e/0x160 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e95ae7&amp;gt;] cl_io_submit_sync+0x87/0x1b0 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa1026adc&amp;gt;] ll_page_sync_io+0x5c/0x110 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0d2009e&amp;gt;] ? lov_attr_get+0x1e/0x60 [lov]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa1045561&amp;gt;] ll_write_begin+0x5c1/0x760 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff8111a7b3&amp;gt;] generic_file_buffered_write+0x123/0x2e0
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81075887&amp;gt;] ? current_fs_time+0x27/0x30
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff8111c210&amp;gt;] __generic_file_aio_write+0x260/0x490
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff8111c4c8&amp;gt;] generic_file_aio_write+0x88/0x100
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa105907b&amp;gt;] vvp_io_write_start+0xdb/0x3d0 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e92dca&amp;gt;] cl_io_start+0x6a/0x140 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e96f54&amp;gt;] cl_io_loop+0xb4/0x1b0 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ff9f36&amp;gt;] ll_file_io_generic+0x2b6/0x710 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0e86d09&amp;gt;] ? cl_env_get+0x29/0x350 [obdclass]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ffa4d2&amp;gt;] ll_file_aio_write+0x142/0x2c0 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffffa0ffad6c&amp;gt;] ll_file_write+0x16c/0x2a0 [lustre]
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81181398&amp;gt;] vfs_write+0xb8/0x1a0
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff81181c91&amp;gt;] sys_write+0x51/0x90
Apr  8 01:27:04 r21 kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
Apr  8 01:29:04 r21 kernel: INFO: task pre:46376 blocked for more than 120 seconds.
Apr  8 01:29:04 r21 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Apr  8 01:29:04 r21 kernel: pre           D 000000000000000c     0 46376  46345 0x00000004
Apr  8 01:29:04 r21 kernel: ffff8807f19f7388 0000000000000086 ffff8807f19f72f8 ffff880b97c9b500
Apr  8 01:29:04 r21 kernel: ffff88085c456700 0000000000000003 ffff8807f19f7328 ffffffff81055f96
Apr  8 01:29:04 r21 kernel: ffff880560820638 ffff8807f19f7fd8 000000000000fb88 ffff880560820638
Apr  8 01:29:04 r21 kernel: Call Trace:
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="24121">LU-4873</key>
            <summary>Lustre client hangs in vvp_page_make_ready</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Tue, 8 Apr 2014 21:02:57 +0000</created>
                <updated>Tue, 29 Apr 2014 17:25:45 +0000</updated>
                            <resolved>Thu, 17 Apr 2014 16:06:24 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="81249" author="pjones" created="Wed, 9 Apr 2014 05:51:44 +0000"  >&lt;p&gt;Niu&lt;/p&gt;

&lt;p&gt;Could you please comment on this issues?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="81253" author="niu" created="Wed, 9 Apr 2014 08:28:10 +0000"  >&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        &lt;span class=&quot;code-comment&quot;&gt;/* To avoid deadlock, &lt;span class=&quot;code-keyword&quot;&gt;try&lt;/span&gt; to lock page first. */&lt;/span&gt;
        vmpage = grab_cache_page_nowait(mapping, index);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (unlikely(vmpage == NULL ||
                     PageDirty(vmpage) || PageWriteback(vmpage))) {
                struct ccc_io *cio = ccc_env_io(env);
                struct cl_page_list *plist = &amp;amp;cio-&amp;gt;u.write.cui_queue;

                /* &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; the page is already in dirty cache, we have to commit
                 * the pages right now; otherwise, it may cause deadlock
                 * because it holds page lock of a dirty page and request &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt;
                 * more grants. It&apos;s okay &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; the dirty page to be the first
                 * one in commit page list, though. */
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (vmpage != NULL &amp;amp;&amp;amp; plist-&amp;gt;pl_nr &amp;gt; 0) {
                        unlock_page(vmpage);
                        page_cache_release(vmpage);
                        vmpage = NULL;
                }

                &lt;span class=&quot;code-comment&quot;&gt;/* commit pages and then wait &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; page lock */&lt;/span&gt;
                result = vvp_io_write_commit(env, io);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (result &amp;lt; 0)
                        GOTO(out, result);

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Look into the ll_write_begin(), if the page is in writeback and cui_queue-&amp;gt;pl_nr == 0, it could call vvp_io_write_commit() with page locked? Xiong, is it possible? please take a look, thanks.&lt;/p&gt;</comment>
                            <comment id="81376" author="jay" created="Thu, 10 Apr 2014 15:15:34 +0000"  >&lt;p&gt;If plist-&amp;gt;nr is zero, then there is no page in pending, so vvp_io_write_commit() is a null op essentially.&lt;/p&gt;</comment>
                            <comment id="81377" author="jay" created="Thu, 10 Apr 2014 15:39:58 +0000"  >&lt;p&gt;the root cause of this issue is as follows:&lt;br/&gt;
1. pages with indices 0,...,254 have already been dirtied and stayed in OSC cache;&lt;br/&gt;
2. thread T tries to write pages from 254 to 255, and the last page(page 255) is a partial page;&lt;br/&gt;
3. T will have to issue a read RPC due to partial page write, with page 254 locked;&lt;br/&gt;
4. T composes a READ RPC and add it into the pending list;&lt;br/&gt;
5. osc_io_unplug() will be called by T to send RPCs;&lt;br/&gt;
6. the osc_extent that contains page &lt;span class=&quot;error&quot;&gt;&amp;#91;0,...254&amp;#93;&lt;/span&gt; is also ready to send;&lt;br/&gt;
7. osc_io_unplug() sends WRITE RPC first, so it will try to compose a WRITE RPC for the above extent, however, the page 254 is already locked by T.&lt;/p&gt;

&lt;p&gt;Deadlocked.&lt;/p&gt;

&lt;p&gt;I will create a patch soon.&lt;/p&gt;
</comment>
                            <comment id="81378" author="jay" created="Thu, 10 Apr 2014 15:53:27 +0000"  >&lt;p&gt;can you please try patch: &lt;a href=&quot;http://review.whamcloud.com/9928?&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9928?&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="81395" author="ihara" created="Thu, 10 Apr 2014 19:47:59 +0000"  >&lt;p&gt;I just tested patch and didn&apos;t reproduce same issue so far. Originally, this problem 100% didn&apos;t happen, so, I will keep on testing, but it seems patch can solve this problem.&lt;/p&gt;</comment>
                            <comment id="81430" author="ihara" created="Fri, 11 Apr 2014 15:45:54 +0000"  >&lt;p&gt;more tested, but nothing reproduced issue so far. the patch solved problem. Thanks!&lt;/p&gt;</comment>
                            <comment id="81842" author="jay" created="Thu, 17 Apr 2014 16:06:24 +0000"  >&lt;p&gt;patch landed.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="24487">LU-4977</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="14676" name="messages" size="231676" author="ihara" created="Tue, 8 Apr 2014 21:02:57 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwjlb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13466</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>