<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:04:18 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6904] linkea prepare in mdt_reint_rename cause deadlock in 24 hours failover</title>
                <link>https://jira.whamcloud.com/browse/LU-6904</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;During 24 hours failover test. MDT stuck at&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LNet: Service thread pid 3156 was inactive for 200.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
Pid: 3156, comm: mdt01_005

Call Trace:
 [&amp;lt;ffffffff81083e1c&amp;gt;] ? lock_timer_base+0x3c/0x70
 [&amp;lt;ffffffff8152a512&amp;gt;] schedule_timeout+0x192/0x2e0
 [&amp;lt;ffffffff81083f30&amp;gt;] ? process_timeout+0x0/0x10
 [&amp;lt;ffffffffa07d9fa9&amp;gt;] ptlrpc_set_wait+0x319/0xa30 [ptlrpc]
 [&amp;lt;ffffffffa07cf520&amp;gt;] ? ptlrpc_interrupted_set+0x0/0x110 [ptlrpc]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa07e61b5&amp;gt;] ? lustre_msg_set_jobid+0xf5/0x130 [ptlrpc]
 [&amp;lt;ffffffffa07da741&amp;gt;] ptlrpc_queue_wait+0x81/0x220 [ptlrpc]
 [&amp;lt;ffffffffa113f271&amp;gt;] osp_remote_sync+0x121/0x190 [osp]
 [&amp;lt;ffffffffa1125b47&amp;gt;] osp_xattr_get+0x737/0x10e0 [osp]
 [&amp;lt;ffffffffa0d2c378&amp;gt;] ? __ldiskfs_journal_stop+0x68/0xa0 [ldiskfs]
 [&amp;lt;ffffffffa104ab25&amp;gt;] lod_xattr_get+0x185/0x760 [lod]
 [&amp;lt;ffffffffa10b8825&amp;gt;] mdd_links_read+0xf5/0x2d0 [mdd]
 [&amp;lt;ffffffffa10bbc4f&amp;gt;] mdd_linkea_prepare+0x39f/0x4d0 [mdd]
 [&amp;lt;ffffffffa10bbea1&amp;gt;] mdd_links_rename+0x121/0x540 [mdd]
 [&amp;lt;ffffffff8152b3a6&amp;gt;] ? down_write+0x16/0x40
 [&amp;lt;ffffffffa10c28ba&amp;gt;] mdd_rename+0x13aa/0x1d50 [mdd]
 [&amp;lt;ffffffffa0f7a105&amp;gt;] mdt_reint_rename_internal+0x1305/0x1a50 [mdt]
 [&amp;lt;ffffffffa0798246&amp;gt;] ? ldlm_lock_enqueue+0x2c6/0x8e0 [ptlrpc]
 [&amp;lt;ffffffffa0f7aa4d&amp;gt;] mdt_reint_rename_or_migrate+0x1fd/0x7e0 [mdt]
 [&amp;lt;ffffffffa07b7160&amp;gt;] ? ldlm_blocking_ast+0x0/0x180 [ptlrpc]
 [&amp;lt;ffffffffa07b8ad0&amp;gt;] ? ldlm_completion_ast+0x0/0x9b0 [ptlrpc]
 [&amp;lt;ffffffffa080cea2&amp;gt;] ? __req_capsule_get+0x162/0x6e0 [ptlrpc]
 [&amp;lt;ffffffff8128b45a&amp;gt;] ? strlcpy+0x4a/0x60
 [&amp;lt;ffffffffa0f7b063&amp;gt;] mdt_reint_rename+0x13/0x20 [mdt]
 [&amp;lt;ffffffffa0f7386d&amp;gt;] mdt_reint_rec+0x5d/0x200 [mdt]
 [&amp;lt;ffffffffa0f5f78b&amp;gt;] mdt_reint_internal+0x62b/0xb80 [mdt]
 [&amp;lt;ffffffffa0f6017b&amp;gt;] mdt_reint+0x6b/0x120 [mdt]
 [&amp;lt;ffffffffa084ef42&amp;gt;] tgt_request_handle+0xa42/0x1230 [ptlrpc]
 [&amp;lt;ffffffffa07f73a1&amp;gt;] ptlrpc_main+0xe41/0x1920 [ptlrpc]
 [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
 [&amp;lt;ffffffff810623a9&amp;gt;] ? find_busiest_queue+0x69/0x150
 [&amp;lt;ffffffff815296ee&amp;gt;] ? thread_return+0x4e/0x770
 [&amp;lt;ffffffffa07f6560&amp;gt;] ? ptlrpc_main+0x0/0x1920 [ptlrpc]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The thread stuck here because the remote MDT restarts. But it also holds the journal handle, which cause a lot journal thread stuck&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;INFO: task jbd2/sdf1-8:2813 blocked for more than 120 seconds.
      Not tainted 2.6.32-431.29.2.el6_lustre.g2382eb0.x86_64 #1
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
jbd2/sdf1-8   D 0000000000000001     0  2813      2 0x00000080
 ffff880fe9511d20 0000000000000046 0000000000000000 0000000000016880
 ffff880fe9511cd0 ffffffff810534d7 0000000000016880 ffff88102e42c080
 ffff88102e42c638 ffff880fe9511fd8 000000000000fbc8 ffff88102e42c638
Call Trace:
 [&amp;lt;ffffffff810534d7&amp;gt;] ? walk_tg_tree_from+0x67/0xc0
 [&amp;lt;ffffffffa0cd580f&amp;gt;] jbd2_journal_commit_transaction+0x19f/0x1500 [jbd2]
 [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
 [&amp;lt;ffffffff81083e1c&amp;gt;] ? lock_timer_base+0x3c/0x70
 [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa0cdba58&amp;gt;] kjournald2+0xb8/0x220 [jbd2]
 [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa0cdb9a0&amp;gt;] ? kjournald2+0x0/0x220 [jbd2]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20

INFO: task tdtd-0:2919 blocked for more than 120 seconds.
      Not tainted 2.6.32-431.29.2.el6_lustre.g2382eb0.x86_64 #1
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
tdtd-0        D 0000000000000000     0  2919      2 0x00000080
 ffff880ffbe2dc20 0000000000000046 0000004cffffffff ffffffffa046b27b
 0000000055b31da1 ffff88103152a000 ffff880ffbe2dc40 ffffffffa0c816cf
 ffff880ffbe1b098 ffff880ffbe2dfd8 000000000000fbc8 ffff880ffbe1b098
Call Trace:
 [&amp;lt;ffffffffa046b27b&amp;gt;] ? cfs_set_ptldebug_header+0x2b/0xc0 [libcfs]
 [&amp;lt;ffffffffa0c816cf&amp;gt;] ? qsd_op_begin+0x5f/0xb70 [lquota]
 [&amp;lt;ffffffff8109b2ce&amp;gt;] ? prepare_to_wait+0x4e/0x80
 [&amp;lt;ffffffffa0cd408a&amp;gt;] start_this_handle+0x25a/0x480 [jbd2]
 [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa0cd4495&amp;gt;] jbd2_journal_start+0xb5/0x100 [jbd2]
 [&amp;lt;ffffffffa0d2c406&amp;gt;] ldiskfs_journal_start_sb+0x56/0xe0 [ldiskfs]
 [&amp;lt;ffffffffa0d7dbef&amp;gt;] osd_trans_start+0x1df/0x410 [osd_ldiskfs]
 [&amp;lt;ffffffffa0860d6e&amp;gt;] distribute_txn_commit_thread+0xa1e/0x1700 [ptlrpc]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0860350&amp;gt;] ? distribute_txn_commit_thread+0x0/0x1700 [ptlrpc]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
INFO: task mdt_out03_003:3164 blocked for more than 120 seconds.
      Not tainted 2.6.32-431.29.2.el6_lustre.g2382eb0.x86_64 #1
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
mdt_out03_003 D 000000000000000f     0  3164      2 0x00000080
 ffff88081a6ada60 0000000000000046 0000000000000000 ffff881029e64000
 ffff88081a6ada20 ffffffffa0db3e79 ffff88081a6ada90 ffffffff815296ee
 ffff880831563058 ffff88081a6adfd8 000000000000fbc8 ffff880831563058
Call Trace:
 [&amp;lt;ffffffffa0db3e79&amp;gt;] ? osd_declare_qid+0x289/0x480 [osd_ldiskfs]
 [&amp;lt;ffffffff815296ee&amp;gt;] ? thread_return+0x4e/0x770
 [&amp;lt;ffffffffa0cd408a&amp;gt;] start_this_handle+0x25a/0x480 [jbd2]
 [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa0cd4495&amp;gt;] jbd2_journal_start+0xb5/0x100 [jbd2]
 [&amp;lt;ffffffffa0d2c406&amp;gt;] ldiskfs_journal_start_sb+0x56/0xe0 [ldiskfs]
 [&amp;lt;ffffffffa0d7dbef&amp;gt;] osd_trans_start+0x1df/0x410 [osd_ldiskfs]
 [&amp;lt;ffffffffa0851ffc&amp;gt;] out_tx_end+0x9c/0x5d0 [ptlrpc]
 [&amp;lt;ffffffffa0856bab&amp;gt;] out_handle+0xd9b/0x17e0 [ptlrpc]
 [&amp;lt;ffffffffa079ffb0&amp;gt;] ? target_bulk_timeout+0x0/0xc0 [ptlrpc]
 [&amp;lt;ffffffffa084ef42&amp;gt;] tgt_request_handle+0xa42/0x1230 [ptlrpc]
 [&amp;lt;ffffffffa07f73a1&amp;gt;] ptlrpc_main+0xe41/0x1920 [ptlrpc]
 [&amp;lt;ffffffff81069290&amp;gt;] ? pick_next_task_fair+0xd0/0x130
 [&amp;lt;ffffffff81529466&amp;gt;] ? schedule+0x176/0x3b0
 [&amp;lt;ffffffffa07f6560&amp;gt;] ? ptlrpc_main+0x0/0x1920 [ptlrpc]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So in rename, it should prepare the linkEA before journal start to avoid holding transaction, while sending RPC.&lt;/p&gt;</description>
                <environment></environment>
        <key id="31219">LU-6904</key>
            <summary>linkea prepare in mdt_reint_rename cause deadlock in 24 hours failover</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="di.wang">Di Wang</assignee>
                                    <reporter username="di.wang">Di Wang</reporter>
                        <labels>
                    </labels>
                <created>Sun, 26 Jul 2015 00:42:28 +0000</created>
                <updated>Wed, 26 Aug 2015 07:47:10 +0000</updated>
                            <resolved>Mon, 3 Aug 2015 03:57:20 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="122221" author="gerrit" created="Sun, 26 Jul 2015 01:07:11 +0000"  >&lt;p&gt;wangdi (di.wang@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/15724&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15724&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6904&quot; title=&quot;linkea prepare in mdt_reint_rename cause deadlock in 24 hours failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6904&quot;&gt;&lt;del&gt;LU-6904&lt;/del&gt;&lt;/a&gt; mdd: prepare linkea before declare&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e62e161dfe66c10160f432d8a6bc75ff91d08003&lt;/p&gt;</comment>
                            <comment id="122973" author="gerrit" created="Mon, 3 Aug 2015 02:02:21 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/15724/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15724/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6904&quot; title=&quot;linkea prepare in mdt_reint_rename cause deadlock in 24 hours failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6904&quot;&gt;&lt;del&gt;LU-6904&lt;/del&gt;&lt;/a&gt; mdd: prepare linkea before declare&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: af46754e1e9c3e66928e13b2301890cfa94ac059&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="31033">LU-6831</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="31147">LU-6883</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxizj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>