<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:25:22 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9344] sanity test_244: sendfile_grouplock test12() test hung</title>
                <link>https://jira.whamcloud.com/browse/LU-9344</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for bobijam &amp;lt;bobijam.xu@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/67af86be-2027-11e7-9073-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/67af86be-2027-11e7-9073-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_244 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test failed to respond and timed out
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Info required for matching: sanity 244&lt;/p&gt;


&lt;p&gt;sendfile_grouplock.c calls sendfile_copy(sourfile, 0, destfile, 98765)&lt;br/&gt;
and sendfile_copy()&amp;#45;&amp;gt;llapi_group_lock(fd_out, dest_gid);&lt;/p&gt;

&lt;p&gt;which will call into lov_io_init() and atomic_inc(&amp;amp;lov-&amp;gt;lo_active_ios)&lt;/p&gt;

&lt;p&gt;and sendfile_copy() tries to write to the file, which will check to get layout, and ll_layout_refresh() finds there is an active ios (marked by ll_get_grouplock()), so the write hung there&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sendfile_grou S 0000000000000000     0  7394   7321 0x00000080
 ffff88000eb3f618 0000000000000082 ffff88000eb3f5e0 ffff88000eb3f5dc
 00001ce200000000 ffff88003f828400 0000005dce083b5f ffff880003436ac0
 00000000000005ff 0000000100017a1d ffff88002b57fad0 ffff88000eb3ffd8
Call Trace:
 [&amp;lt;ffffffffa0afa20b&amp;gt;] lov_layout_wait+0x11b/0x220 [lov]
 [&amp;lt;ffffffff810640e0&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0afc11e&amp;gt;] lov_conf_set+0x37e/0xa30 [lov]
 [&amp;lt;ffffffffa040f471&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa059d888&amp;gt;] cl_conf_set+0x58/0x100 [obdclass]
 [&amp;lt;ffffffffa0fa5dd4&amp;gt;] ll_layout_conf+0x84/0x3f0 [lustre]
 [&amp;lt;ffffffffa040f471&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa0fb0b9d&amp;gt;] ll_layout_refresh+0x96d/0x1710 [lustre]
 [&amp;lt;ffffffffa040f471&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa0ff7d6f&amp;gt;] vvp_io_init+0x32f/0x450 [lustre]
 [&amp;lt;ffffffffa040f471&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
 [&amp;lt;ffffffffa05a5148&amp;gt;] cl_io_init0+0x88/0x150 [obdclass]
 [&amp;lt;ffffffffa05a7caa&amp;gt;] cl_io_init+0x4a/0xa0 [obdclass]
 [&amp;lt;ffffffffa05a7dbc&amp;gt;] cl_io_rw_init+0xbc/0x200 [obdclass]
 [&amp;lt;ffffffffa0fa7213&amp;gt;] ll_file_io_generic+0x203/0xaf0 [lustre]
 [&amp;lt;ffffffffa0fa941d&amp;gt;] ll_file_aio_write+0x13d/0x280 [lustre]
 [&amp;lt;ffffffffa0fa969a&amp;gt;] ll_file_write+0x13a/0x270 [lustre]
 [&amp;lt;ffffffff81189ef8&amp;gt;] vfs_write+0xb8/0x1a0
 [&amp;lt;ffffffff811ba76d&amp;gt;] kernel_write+0x3d/0x50
 [&amp;lt;ffffffff811ba7da&amp;gt;] write_pipe_buf+0x5a/0x90
 [&amp;lt;ffffffff811b9342&amp;gt;] splice_from_pipe_feed+0x72/0x120
 [&amp;lt;ffffffff811ba780&amp;gt;] ? write_pipe_buf+0x0/0x90
 [&amp;lt;ffffffff811ba780&amp;gt;] ? write_pipe_buf+0x0/0x90
 [&amp;lt;ffffffff811b9d9e&amp;gt;] __splice_from_pipe+0x6e/0x80
 [&amp;lt;ffffffff811ba780&amp;gt;] ? write_pipe_buf+0x0/0x90
 [&amp;lt;ffffffff811b9e01&amp;gt;] splice_from_pipe+0x51/0x70
 [&amp;lt;ffffffff811b9e3d&amp;gt;] default_file_splice_write+0x1d/0x30
 [&amp;lt;ffffffff811b9fca&amp;gt;] do_splice_from+0xba/0xf0
 [&amp;lt;ffffffff811ba020&amp;gt;] direct_splice_actor+0x20/0x30
 [&amp;lt;ffffffff811ba256&amp;gt;] splice_direct_to_actor+0xc6/0x1c0
 [&amp;lt;ffffffff811ba000&amp;gt;] ? direct_splice_actor+0x0/0x30
 [&amp;lt;ffffffff811ba39d&amp;gt;] do_splice_direct+0x4d/0x60
 [&amp;lt;ffffffff8118a344&amp;gt;] do_sendfile+0x184/0x1e0
 [&amp;lt;ffffffff8118a3d4&amp;gt;] sys_sendfile64+0x34/0xb0
 [&amp;lt;ffffffff810e031e&amp;gt;] ? __audit_syscall_exit+0x25e/0x290
 [&amp;lt;ffffffff8100b0d2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="45506">LU-9344</key>
            <summary>sanity test_244: sendfile_grouplock test12() test hung</summary>
                <type id="7" iconUrl="https://jira.whamcloud.com/images/icons/issuetypes/task_agile.png">Technical task</type>
                            <parent id="45517">LU-9349</parent>
                                    <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>pfl</label>
                    </labels>
                <created>Fri, 14 Apr 2017 08:58:02 +0000</created>
                <updated>Mon, 10 Jul 2017 20:08:35 +0000</updated>
                            <resolved>Fri, 28 Apr 2017 22:21:12 +0000</resolved>
                                    <version>Lustre 2.10.0</version>
                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="192133" author="bobijam" created="Fri, 14 Apr 2017 19:47:18 +0000"  >&lt;p&gt;Jinshan,&lt;/p&gt;

&lt;p&gt;I think the sendfile_grouplock.c does not use group lock correctly. It holds a group lock while trying write data to it.&lt;/p&gt;</comment>
                            <comment id="192173" author="gerrit" created="Sat, 15 Apr 2017 04:28:28 +0000"  >&lt;p&gt;Bobi Jam (bobijam@hotmail.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/26646&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26646&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9344&quot; title=&quot;sanity test_244: sendfile_grouplock test12() test hung&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9344&quot;&gt;&lt;del&gt;LU-9344&lt;/del&gt;&lt;/a&gt; test: hung with test12()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 92460697205d25e2de08f4c6d05e3dc8d8bc3387&lt;/p&gt;</comment>
                            <comment id="192207" author="jay" created="Mon, 17 Apr 2017 06:32:44 +0000"  >&lt;p&gt;Bobijam and I discussed this problem a little bit. Group lock needs to acquire locks from all objects in the current layout, so that it has to increase active_ios in the LOV layer, therefore this layout won&apos;t disappear during the existence of group lock.&lt;/p&gt;

&lt;p&gt;When a write extends PFL layout with group lock held, it will result in deadlock because configuring new layout lock needs to wait for the active IOs to reach to zero.&lt;/p&gt;

&lt;p&gt;The current workaround solution is to instantiate all components before group lock is taken.&lt;/p&gt;</comment>
                            <comment id="193105" author="adilger" created="Sat, 22 Apr 2017 06:38:33 +0000"  >&lt;p&gt;This will hurt all file migration operations, since it will instantiate all layout components on both the source and target files.  That wouldn&apos;t be so bad if it only instantiated the components on the source, but that doesn&apos;t really make sense to instantiate the components when getting the group lock on any file that is opened read-only.&lt;/p&gt;

&lt;p&gt;That said, I&apos;m wondering if there is even a race when getting the group lock on the new objects?  Since the client(s) writing to the file are already holding the group lock on the objects on the first part of the file, any other clients would be blocked from accessing the file if they are enqueuing the group locks in component order.  The existing group lock holders could still group lock the newly allocated objects without dropping the locks on the existing objects (which &lt;em&gt;would&lt;/em&gt; cause a deadlock).&lt;/p&gt;</comment>
                            <comment id="193141" author="jay" created="Sun, 23 Apr 2017 04:57:12 +0000"  >&lt;p&gt;For migration, there is another option to use Lustre file lease. But really good point on acquiring group lock when a file is opened for read only.&lt;/p&gt;

&lt;p&gt;It seems like it&apos;s hard to maintain current semantics of group lock. Can we revise the semantics of the group lock? For example, group lock will fail if the file&apos;s layout change is changed.&lt;/p&gt;</comment>
                            <comment id="193948" author="gerrit" created="Fri, 28 Apr 2017 20:36:26 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/26646/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26646/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9344&quot; title=&quot;sanity test_244: sendfile_grouplock test12() test hung&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9344&quot;&gt;&lt;del&gt;LU-9344&lt;/del&gt;&lt;/a&gt; test: hung with sendfile_grouplock test12()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c6b5df7644c245853b5dcf82b1c93614c5357f3f&lt;/p&gt;</comment>
                            <comment id="193969" author="pjones" created="Fri, 28 Apr 2017 22:21:12 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="42916">LU-8998</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="45812">LU-9429</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="45980">LU-9479</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="47174">LU-9756</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzza9b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>