<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:13:42 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7992] HSM: mutual deadlock when killing a copytool</title>
                <link>https://jira.whamcloud.com/browse/LU-7992</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;If something tries to kill a copytool is while it is performing some requests, it will deadlock. It won&apos;t finish ever, and remains unkillable even with -KILL. The Lustre client has to be rebooted to fix the situation.&lt;/p&gt;

&lt;p&gt;The Lustre kernel thread:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[62266.422298] LNet: Service thread pid 30493 was inactive for 62.18s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[62266.424940] Pid: 30493, comm: ldlm_cb03_001
[62266.426214] 
Call Trace:
[62266.428573]  [&amp;lt;ffffffff81661eb9&amp;gt;] schedule+0x29/0x70
[62266.429763]  [&amp;lt;ffffffff811f3266&amp;gt;] pipe_wait+0x76/0xd0
[62266.430928]  [&amp;lt;ffffffff810a8040&amp;gt;] ? autoremove_wake_function+0x0/0x40
[62266.432062]  [&amp;lt;ffffffff811f357f&amp;gt;] pipe_write+0x25f/0x600
[62266.433174]  [&amp;lt;ffffffff8101cd35&amp;gt;] ? native_sched_clock+0x35/0x80
[62266.434270]  [&amp;lt;ffffffff8101cd89&amp;gt;] ? sched_clock+0x9/0x10
[62266.435351]  [&amp;lt;ffffffff81310823&amp;gt;] ? number.isra.2+0x323/0x360
[62266.436418]  [&amp;lt;ffffffff811e9d7d&amp;gt;] do_sync_write+0x8d/0xd0
[62266.437455]  [&amp;lt;ffffffff811ea5d0&amp;gt;] vfs_write+0xc0/0x1f0
[62266.438570]  [&amp;lt;ffffffffa07ba119&amp;gt;] libcfs_kkuc_msg_put+0xa9/0x250 [obdclass]
[62266.439598]  [&amp;lt;ffffffffa07ba33e&amp;gt;] libcfs_kkuc_group_put+0x7e/0x190 [obdclass]
[62266.440604]  [&amp;lt;ffffffffa0ba69bd&amp;gt;] mdc_set_info_async+0x7ad/0x820 [mdc]
[62266.441692]  [&amp;lt;ffffffffa09d1c81&amp;gt;] ldlm_callback_handler.part.24+0x1781/0x2140 [ptlrpc]
[62266.442721]  [&amp;lt;ffffffffa068d937&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
[62266.443712]  [&amp;lt;ffffffffa09d267a&amp;gt;] ldlm_callback_handler+0x3a/0xd0 [ptlrpc]
[62266.444699]  [&amp;lt;ffffffffa09ff081&amp;gt;] ptlrpc_server_handle_request+0x231/0xad0 [ptlrpc]
[62266.445672]  [&amp;lt;ffffffffa09fcba5&amp;gt;] ? ptlrpc_wait_event+0xa5/0x360 [ptlrpc]
[62266.446599]  [&amp;lt;ffffffff810e5b7f&amp;gt;] ? lock_release_holdtime.part.24+0xf/0x180
[62266.447533]  [&amp;lt;ffffffffa0a032c0&amp;gt;] ptlrpc_main+0xab0/0x1e30 [ptlrpc]
[62266.448416]  [&amp;lt;ffffffff810b2ba4&amp;gt;] ? finish_task_switch+0x44/0x180
[62266.449313]  [&amp;lt;ffffffffa0a02810&amp;gt;] ? ptlrpc_main+0x0/0x1e30 [ptlrpc]
[62266.450138]  [&amp;lt;ffffffff810a6e6d&amp;gt;] kthread+0xed/0x100
[62266.450950]  [&amp;lt;ffffffff810a6d80&amp;gt;] ? kthread+0x0/0x100
[62266.451757]  [&amp;lt;ffffffff8166d4d8&amp;gt;] ret_from_fork+0x58/0x90
[62266.452539]  [&amp;lt;ffffffff810a6d80&amp;gt;] ? kthread+0x0/0x100
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The copy tool:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[62338.455850] INFO: task tas_cmm:25225 blocked for more than 120 seconds.
[62338.457167] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[62338.458499] tas_cmm         D ffff88062d8ec650     0 25225      1 0x00000084
[62338.459736]  ffff8800a0463a78 0000000000000046 ffff88027685ddc0 ffff8800a0463fd8
[62338.460889]  ffff8800a0463fd8 ffff8800a0463fd8 ffff88027685ddc0 ffff88027685ddc0
[62338.462227]  ffffffffa0806640 ffffffffa0806648 ffffffff00000000 ffffffffa0806690
[62338.463510] Call Trace:
[62338.464693]  [&amp;lt;ffffffff81661eb9&amp;gt;] schedule+0x29/0x70
[62338.465447]  [&amp;lt;ffffffff81663555&amp;gt;] rwsem_down_write_failed+0x105/0x1c0
[62338.466116]  [&amp;lt;ffffffff81312400&amp;gt;] ? pointer.isra.19+0x190/0x4c0
[62338.466954]  [&amp;lt;ffffffff81314f43&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
[62338.468232]  [&amp;lt;ffffffff81661135&amp;gt;] ? down_write+0x85/0x9c
[62338.469526]  [&amp;lt;ffffffffa07ba4c9&amp;gt;] ? libcfs_kkuc_group_rem+0x79/0x290 [obdclass]
[62338.470827]  [&amp;lt;ffffffffa07ba4c9&amp;gt;] libcfs_kkuc_group_rem+0x79/0x290 [obdclass]
[62338.471585]  [&amp;lt;ffffffffa094e895&amp;gt;] ? obd_iocontrol+0xd5/0x350 [lmv]
[62338.472276]  [&amp;lt;ffffffffa0954bbe&amp;gt;] lmv_iocontrol+0x78e/0x2720 [lmv]
[62338.472973]  [&amp;lt;ffffffff810bd815&amp;gt;] ? local_clock+0x25/0x30
[62338.473657]  [&amp;lt;ffffffff810e78cd&amp;gt;] ? __lock_acquire.isra.31+0x2ad/0xba0
[62338.474318]  [&amp;lt;ffffffff8101cd35&amp;gt;] ? native_sched_clock+0x35/0x80
[62338.475000]  [&amp;lt;ffffffff8101cd89&amp;gt;] ? sched_clock+0x9/0x10
[62338.475678]  [&amp;lt;ffffffff810e78cd&amp;gt;] ? __lock_acquire.isra.31+0x2ad/0xba0
[62338.476329]  [&amp;lt;ffffffff811cd743&amp;gt;] ? kmem_cache_alloc_trace+0x1b3/0x1d0
[62338.477018]  [&amp;lt;ffffffffa118b7cf&amp;gt;] ? copy_and_ioctl.constprop.22+0x2f/0x4f0 [lustre]
[62338.477714]  [&amp;lt;ffffffffa118b98b&amp;gt;] copy_and_ioctl.constprop.22+0x1eb/0x4f0 [lustre]
[62338.478393]  [&amp;lt;ffffffffa1191a88&amp;gt;] ll_dir_ioctl+0x2438/0x6420 [lustre]
[62338.479074]  [&amp;lt;ffffffff81657e30&amp;gt;] ? __slab_free+0x10e/0x277
[62338.479771]  [&amp;lt;ffffffff8101cd35&amp;gt;] ? native_sched_clock+0x35/0x80
[62338.480410]  [&amp;lt;ffffffff8101cd89&amp;gt;] ? sched_clock+0x9/0x10
[62338.481077]  [&amp;lt;ffffffff810bd815&amp;gt;] ? local_clock+0x25/0x30
[62338.481736]  [&amp;lt;ffffffff810e78cd&amp;gt;] ? __lock_acquire.isra.31+0x2ad/0xba0
[62338.482361]  [&amp;lt;ffffffff8101cd35&amp;gt;] ? native_sched_clock+0x35/0x80
[62338.483012]  [&amp;lt;ffffffff8101cd89&amp;gt;] ? sched_clock+0x9/0x10
[62338.483662]  [&amp;lt;ffffffff810bd815&amp;gt;] ? local_clock+0x25/0x30
[62338.484280]  [&amp;lt;ffffffff810e5b7f&amp;gt;] ? lock_release_holdtime.part.24+0xf/0x180
[62338.484939]  [&amp;lt;ffffffff810af97f&amp;gt;] ? lg_local_unlock+0x1f/0x60
[62338.485565]  [&amp;lt;ffffffff8120b98e&amp;gt;] ? mntput_no_expire+0x3e/0x120
[62338.486220]  [&amp;lt;ffffffff811fe795&amp;gt;] do_vfs_ioctl+0x2e5/0x4d0
[62338.486863]  [&amp;lt;ffffffff811fea21&amp;gt;] SyS_ioctl+0xa1/0xc0
[62338.487451]  [&amp;lt;ffffffff8166d589&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Lock debug reports:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[62362.788685] 1 lock held by ldlm_cb03_001/30493:
[62362.789459]  #0:  (kg_sem){......}, at: [&amp;lt;ffffffffa07ba307&amp;gt;] libcfs_kkuc_group_put+0x47/0x190 [obdclass]
[62362.800051] 1 lock held by tas_cmm/25225:
[62362.800886]  #0:  (kg_sem){......}, at: [&amp;lt;ffffffffa07ba4c9&amp;gt;] libcfs_kkuc_group_rem+0x79/0x290 [obdclass]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It appears that ldlm_cb03_001 block on the pipe_wait, while holding the lock that the copytool needs to exit. The copytool can&apos;t read the message because it is finishing and waiting for the lock that ldlm_cb03_001 has.&lt;/p&gt;

</description>
                <environment></environment>
        <key id="35907">LU-7992</key>
            <summary>HSM: mutual deadlock when killing a copytool</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jhammond">John Hammond</assignee>
                                    <reporter username="fzago">Frank Zago</reporter>
                        <labels>
                    </labels>
                <created>Wed, 6 Apr 2016 16:48:53 +0000</created>
                <updated>Wed, 29 Mar 2017 13:16:42 +0000</updated>
                            <resolved>Thu, 21 Apr 2016 20:44:14 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                    <version>Lustre 2.8.0</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="148007" author="fzago" created="Wed, 6 Apr 2016 16:49:38 +0000"  >&lt;p&gt;This is easily reproducible. Create a bunch of files, start the copytool then kill it.&lt;/p&gt;</comment>
                            <comment id="148147" author="jhammond" created="Thu, 7 Apr 2016 16:46:25 +0000"  >&lt;p&gt;I have a patch for this which I&apos;ll push after LUG.&lt;/p&gt;</comment>
                            <comment id="148155" author="fzago" created="Thu, 7 Apr 2016 17:09:01 +0000"  >&lt;p&gt;You can fix bugs in less time it takes me to write the report &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="148389" author="gerrit" created="Mon, 11 Apr 2016 14:35:36 +0000"  >&lt;p&gt;John L. Hammond (john.hammond@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19442&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19442&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7992&quot; title=&quot;HSM: mutual deadlock when killing a copytool&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7992&quot;&gt;&lt;del&gt;LU-7992&lt;/del&gt;&lt;/a&gt; hsm: close KUC pipe before unregistering&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 0b56e0ce0deff5097c75d4fc42248f4fee714e64&lt;/p&gt;</comment>
                            <comment id="148988" author="gerrit" created="Thu, 14 Apr 2016 19:45:06 +0000"  >&lt;p&gt;Frank Zago (fzago@cray.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/19576&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19576&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7992&quot; title=&quot;HSM: mutual deadlock when killing a copytool&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7992&quot;&gt;&lt;del&gt;LU-7992&lt;/del&gt;&lt;/a&gt; hsm: close KUC pipe before unregistering&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 0a6bc9601906e4d48592a43d33b919d5f30f09ad&lt;/p&gt;</comment>
                            <comment id="149631" author="gerrit" created="Thu, 21 Apr 2016 02:28:24 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/19442/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/19442/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7992&quot; title=&quot;HSM: mutual deadlock when killing a copytool&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7992&quot;&gt;&lt;del&gt;LU-7992&lt;/del&gt;&lt;/a&gt; hsm: close KUC pipe before unregistering&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 62752bf3b143630560adfdb568606a661fdafafb&lt;/p&gt;</comment>
                            <comment id="149754" author="jgmitter" created="Thu, 21 Apr 2016 20:44:14 +0000"  >&lt;p&gt;Landed to master for 2.9.0&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzy6zz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>