<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:15:41 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-15127] import invalidation vs writeback deadlock</title>
                <link>https://jira.whamcloud.com/browse/LU-15127</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;racer hits this deadlock few times a day:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
schedule,osc_extent_wait,osc_cache_wait_range,osc_cache_writeback_range,osc_io_fsync_start,cl_io_start,lov_io_call,cl_io_start,cl_io_loop,cl_sync_file_range,ll_delete_inode,evict,__dentry_kill,dentry_kill,dput,ll_dirty_page_discard_warn,vvp_page_completion_write,cl_page_completion,osc_ap_completion,osc_extent_finish,brw_interpret,ptlrpc_check_set,ptlrpcd
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;ptlrpcd_00_00&quot;&lt;/span&gt;:4889 

schedule,osc_extent_wait,osc_cache_wait_range,osc_cache_writeback_range,osc_ldlm_blocking_ast,ldlm_cancel_callback,ldlm_cli_cancel_local,ldlm_cli_cancel,osc_ldlm_blocking_ast,ldlm_handle_bl_callback,ldlm_bl_thread_main
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;ldlm_bl_02&quot;&lt;/span&gt;:7759 

schedule,ptlrpc_invalidate_import,ptlrpc_invalidate_import_thread
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;ll_imp_inval&quot;&lt;/span&gt;:293752 

schedule,ptlrpc_invalidate_import,ptlrpc_set_import_active,osc_iocontrol,lov_iocontrol,ll_umount_begin,ksys_umount,__x64_sys_umount
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;umount&quot;&lt;/span&gt;:449648 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="66730">LU-15127</key>
            <summary>import invalidation vs writeback deadlock</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="paf0186">Patrick Farrell</assignee>
                                    <reporter username="bzzz">Alex Zhuravlev</reporter>
                        <labels>
                    </labels>
                <created>Tue, 19 Oct 2021 06:10:28 +0000</created>
                <updated>Mon, 14 Aug 2023 16:26:33 +0000</updated>
                                            <version>Upstream</version>
                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="318071" author="paf0186" created="Fri, 12 Nov 2021 12:31:11 +0000"  >&lt;p&gt;Ah, i think this is the classic dirty page discard warn issue&#8230; it takes a reference to print a debug message and can end up deadlocked because of it.&lt;/p&gt;</comment>
                            <comment id="318072" author="paf0186" created="Fri, 12 Nov 2021 12:32:24 +0000"  >&lt;p&gt;I think I can figure out a fix (unless you&#8217;re already working on it).&lt;/p&gt;</comment>
                            <comment id="318078" author="bzzz" created="Fri, 12 Nov 2021 13:01:04 +0000"  >&lt;p&gt;please, go ahead, I&apos;m busy with another stuff.&lt;/p&gt;</comment>
                            <comment id="318100" author="gerrit" created="Fri, 12 Nov 2021 16:34:54 +0000"  >&lt;p&gt;&quot;Patrick Farrell &amp;lt;pfarrell@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45550&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45550&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15127&quot; title=&quot;import invalidation vs writeback deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15127&quot;&gt;LU-15127&lt;/a&gt; llite: Remove path from discard_warn&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d04a1929b3adb776173b02e0f6b82d396046dd14&lt;/p&gt;</comment>
                            <comment id="319114" author="bzzz" created="Wed, 24 Nov 2021 16:22:46 +0000"  >&lt;p&gt;the invalidation path still deadlock, but much less frequently. I guess this is a bit different issue?&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
schedule,ldlm_completion_ast,ldlm_lock_match_with_skip,osc_enqueue_base,osc_lock_enqueue,cl_lock_enqueue,lov_lock_enqueue,cl_lock_enqueue,cl_lock_request,cl_io_lock,cl_io_loop,cl_setattr_ost,ll_setattr_raw,do_truncate,path_openat,do_filp_open,do_sys_open
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;cp&quot;&lt;/span&gt;:367053 

schedule,osc_object_invalidate,osc_ldlm_resource_invalidate,cfs_hash_for_each_relax,cfs_hash_for_each_nolock,osc_import_event,ptlrpc_invalidate_import,ptlrpc_invalidate_import_thread
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;ll_imp_inval&quot;&lt;/span&gt;:384198 

schedule,osc_object_invalidate,osc_ldlm_resource_invalidate,cfs_hash_for_each_relax,cfs_hash_for_each_nolock,osc_import_event,ptlrpc_invalidate_import,ptlrpc_set_import_active,osc_iocontrol,lov_iocontrol,ll_umount_begin,ksys_umount,__x64_sys_umount
	PIDs(1): &lt;span class=&quot;code-quote&quot;&gt;&quot;umount&quot;&lt;/span&gt;:384405 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
</comment>
                            <comment id="319116" author="paf0186" created="Wed, 24 Nov 2021 16:44:22 +0000"  >&lt;p&gt;Hmm...&#160; So osc_object_invalidate is probably waiting for nr_ios to be zero, and that&apos;s incremented in cl_io_iter_init (osc_io_iter_init) before the lock request is made.&lt;/p&gt;

&lt;p&gt;So I guess it&apos;s waiting for that competion ast, which is waiting for the lock to be granted or cancelled.&lt;/p&gt;

&lt;p&gt;So somehow that lock isn&apos;t getting granted or cancelled I guess?&lt;/p&gt;

&lt;p&gt;I&apos;m not quite sure how pending lock requests are cancelled when an import is invalidated.&lt;/p&gt;</comment>
                            <comment id="319117" author="paf0186" created="Wed, 24 Nov 2021 16:47:06 +0000"  >&lt;p&gt;Are you able to dump the LDLM namespaces for that hang?&lt;/p&gt;</comment>
                            <comment id="319122" author="paf0186" created="Wed, 24 Nov 2021 17:00:32 +0000"  >&lt;p&gt;So looking at osc_import_event for INVALIDATE, we call:&lt;/p&gt;

&lt;p&gt;ldlm_namespace_cleanup&lt;br/&gt;
osc_ldlm_resource_invalidate (which calls osc_object_invalidate)&lt;br/&gt;
Then call&lt;br/&gt;
ldlm_namespace_cleanup&lt;/p&gt;

&lt;p&gt;again&lt;/p&gt;

&lt;p&gt;ldlm_namespace_cleanup calls ldlm_resource_clean, then ldlm_resource_complain.&lt;/p&gt;

&lt;p&gt;ldlm_resource_complain shows that we can sometimes have locks left after ldlm_resource_clean&lt;/p&gt;

&lt;p&gt;So the thread doing &apos;cp&apos; is trying to match an existing lock.&lt;/p&gt;

&lt;p&gt;So the lock survives the call to ldlm_resource_clean, then the osc oo_nr_ios is &amp;gt; 0, so we cannot invalidate the OSC object, so we do not try to clean up the lock again.&lt;/p&gt;

&lt;p&gt;I don&apos;t know why a lock would survive ldlm_resource_clean, but that seems like the issue.&lt;/p&gt;

&lt;p&gt;Maybe we need to call ldlm_resource_clean from osc_object_invalidate if oo_nr_ios is &amp;gt; 0 ?&lt;/p&gt;</comment>
                            <comment id="319129" author="gerrit" created="Wed, 24 Nov 2021 17:42:34 +0000"  >&lt;p&gt;&quot;Patrick Farrell &amp;lt;pfarrell@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45658&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45658&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15127&quot; title=&quot;import invalidation vs writeback deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15127&quot;&gt;LU-15127&lt;/a&gt; osc: Resource cleanup in osc invalidate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 7bad59f258269dd358d066bd023defeaec955f6d&lt;/p&gt;</comment>
                            <comment id="319130" author="paf0186" created="Wed, 24 Nov 2021 17:42:58 +0000"  >&lt;p&gt;There are a few fairly heroic guesses in that patch, but I think it&apos;s probably right...&#160; Alex, if you can try it in your test rig...&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="56293">LU-12522</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="67506">LU-15340</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i027qf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>