<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:05:16 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7016] deadlock on reading multiple files from HSM archive</title>
                <link>https://jira.whamcloud.com/browse/LU-7016</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When it reads multiple files from HSM archive using posix copytool, it causes deadlock on the client.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Aug 18 07:46:58 r38 kernel: INFO: task lhsmtool_posix:16414 blocked for more than 120 seconds.
Aug 18 07:46:58 r38 kernel:      Not tainted 2.6.32-431.29.2.el6.x86_64 #1
Aug 18 07:46:58 r38 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Aug 18 07:46:58 r38 kernel: lhsmtool_posi D 0000000000000002     0 16414  14111 0x00000000
Aug 18 07:46:58 r38 kernel: ffff880f6379bd18 0000000000000082 ffff88102609502a ffff881020055c40
Aug 18 07:46:58 r38 kernel: ffff880f6379bc88 ffffffff81227e9f ffff880f6379bd68 ffffffff81199045
Aug 18 07:46:59 r38 kernel: ffff880f67361af8 ffff880f6379bfd8 000000000000fbc8 ffff880f67361af8
Aug 18 07:46:59 r38 kernel: Call Trace:
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81227e9f&amp;gt;] ? security_inode_permission+0x1f/0x30
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81199045&amp;gt;] ? __link_path_walk+0x145/0x1000
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8152a5be&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8152a45b&amp;gt;] mutex_lock+0x2b/0x50
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8119ba76&amp;gt;] do_filp_open+0x2d6/0xd20
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffffa0605343&amp;gt;] ? ll_file_release+0x683/0xac0 [lustre]
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81153ea1&amp;gt;] ? unlink_anon_vmas+0x71/0xd0
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8128f83a&amp;gt;] ? strncpy_from_user+0x4a/0x90
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff811a8b82&amp;gt;] ? alloc_fd+0x92/0x160
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81185be9&amp;gt;] do_sys_open+0x69/0x140
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81185d00&amp;gt;] sys_open+0x20/0x30
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
Aug 18 07:46:59 r38 kernel: INFO: task lhsmtool_posix:16415 blocked for more than 120 seconds.
Aug 18 07:46:59 r38 kernel:      Not tainted 2.6.32-431.29.2.el6.x86_64 #1
Aug 18 07:46:59 r38 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Aug 18 07:46:59 r38 kernel: lhsmtool_posi D 0000000000000004     0 16415  14111 0x00000000
Aug 18 07:46:59 r38 kernel: ffff880f634c1d18 0000000000000082 0000000000000000 ffff881020055c40
Aug 18 07:46:59 r38 kernel: ffff880f634c1c88 ffffffff81227e9f ffff880f634c1d68 ffffffff81199045
Aug 18 07:46:59 r38 kernel: ffff880f64e5a5f8 ffff880f634c1fd8 000000000000fbc8 ffff880f64e5a5f8
Aug 18 07:46:59 r38 kernel: Call Trace:
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81227e9f&amp;gt;] ? security_inode_permission+0x1f/0x30
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81199045&amp;gt;] ? __link_path_walk+0x145/0x1000
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8152a5be&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8152a45b&amp;gt;] mutex_lock+0x2b/0x50
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8119ba76&amp;gt;] do_filp_open+0x2d6/0xd20
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffffa0605343&amp;gt;] ? ll_file_release+0x683/0xac0 [lustre]
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8128f83a&amp;gt;] ? strncpy_from_user+0x4a/0x90
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff811a8b82&amp;gt;] ? alloc_fd+0x92/0x160
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81185be9&amp;gt;] do_sys_open+0x69/0x140
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81185d00&amp;gt;] sys_open+0x20/0x30
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
Aug 18 07:46:59 r38 kernel: INFO: task md5sum:16312 blocked for more than 120 seconds.
Aug 18 07:46:59 r38 kernel:      Not tainted 2.6.32-431.29.2.el6.x86_64 #1
Aug 18 07:46:59 r38 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Aug 18 07:46:59 r38 kernel: md5sum        D 0000000000000001     0 16312  15803 0x00000000
Aug 18 07:46:59 r38 kernel: ffff881fbd611b58 0000000000000082 0000000000000000 0000000000000286
Aug 18 07:46:59 r38 kernel: ffff881fbd611b68 ffff881020055c40 ffff881fbd611b38 ffffffffa05eb3f0
Aug 18 07:46:59 r38 kernel: ffff882026a85ab8 ffff881fbd611fd8 000000000000fbc8 ffff882026a85ab8
Aug 18 07:46:59 r38 kernel: Call Trace:
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffffa05eb3f0&amp;gt;] ? ll_lookup_finish_locks+0x270/0x8d0 [lustre]
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8152a5be&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8152a45b&amp;gt;] mutex_lock+0x2b/0x50
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff811989ab&amp;gt;] do_lookup+0x11b/0x230
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff811996a4&amp;gt;] __link_path_walk+0x7a4/0x1000
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8114a3d7&amp;gt;] ? handle_pte_fault+0xf7/0xb00
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8104eeb7&amp;gt;] ? pte_alloc_one+0x37/0x50
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8119a1ba&amp;gt;] path_walk+0x6a/0xe0
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8119a3cb&amp;gt;] filename_lookup+0x6b/0xc0
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81226d56&amp;gt;] ? security_file_alloc+0x16/0x20
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8119b8a4&amp;gt;] do_filp_open+0x104/0xd20
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8114e820&amp;gt;] ? __vma_link_rb+0x30/0x40
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8114e8cb&amp;gt;] ? vma_link+0x9b/0xf0
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8128f83a&amp;gt;] ? strncpy_from_user+0x4a/0x90
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff811a8b82&amp;gt;] ? alloc_fd+0x92/0x160
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81185be9&amp;gt;] do_sys_open+0x69/0x140
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff81185d00&amp;gt;] sys_open+0x20/0x30
Aug 18 07:46:59 r38 kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>master</environment>
        <key id="31510">LU-7016</key>
            <summary>deadlock on reading multiple files from HSM archive</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Mon, 17 Aug 2015 22:50:40 +0000</created>
                <updated>Wed, 13 Oct 2021 03:06:12 +0000</updated>
                            <resolved>Wed, 13 Oct 2021 03:06:12 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="124365" author="ihara" created="Mon, 17 Aug 2015 22:52:48 +0000"  >&lt;p&gt;this is reproduder.&lt;br/&gt;
the problem always happens when reading (md5sum) files &lt;/p&gt;</comment>
                            <comment id="124366" author="ihara" created="Mon, 17 Aug 2015 22:53:43 +0000"  >&lt;p&gt;client&apos;s syslog&lt;/p&gt;</comment>
                            <comment id="124367" author="bfaccini" created="Mon, 17 Aug 2015 23:13:29 +0000"  >&lt;p&gt;Hello Ihara,&lt;br/&gt;
According to the syslog you have provided, and the hung threads stacks dumped in it, both your reproducer/script, and the md5sum commands, are running on the same node than the Posix CopyTool. So, the first question that comes to my mind is, does your copytool use the same Lustre mount-point than the others Lustre Client-side commands ??&lt;br/&gt;
And if yes, can you give a new try to your reproducer with the Posix CopyTool using a separate mount-point ?&lt;/p&gt;</comment>
                            <comment id="124369" author="ihara" created="Mon, 17 Aug 2015 23:21:29 +0000"  >&lt;p&gt;Yes, test script and copytool are running on same node. is that problem?&lt;br/&gt;
let me quick run on separate nodes. Thanks for your advise.&lt;/p&gt;</comment>
                            <comment id="124370" author="bfaccini" created="Mon, 17 Aug 2015 23:32:21 +0000"  >&lt;p&gt;Thanks in advance, but you can also use a separate+specific Lustre mount-point for the copytool.&lt;/p&gt;</comment>
                            <comment id="124371" author="ihara" created="Tue, 18 Aug 2015 00:02:59 +0000"  >&lt;p&gt;OK, confirmed. At least, it works if we use separate nodes for copytool and test script.&lt;/p&gt;</comment>
                            <comment id="125601" author="jfc" created="Sat, 29 Aug 2015 00:48:54 +0000"  >&lt;p&gt;Any more work required on this ticket or can we mark it as resolved?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="125655" author="bfaccini" created="Mon, 31 Aug 2015 08:04:36 +0000"  >&lt;p&gt;John, even if the dual/2nd specific mount-point work-around works, I still need to work with the reproducer provided and understand/fix the dead-lock condition.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="18630" name="messages" size="220882" author="ihara" created="Mon, 17 Aug 2015 22:53:43 +0000"/>
                            <attachment id="18629" name="mult_test.sh" size="1280" author="ihara" created="Mon, 17 Aug 2015 22:52:48 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 31 Aug 2015 22:50:40 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxknz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 17 Aug 2015 22:50:40 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>