<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:10:37 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-811] possible recursive locking detected on client</title>
                <link>https://jira.whamcloud.com/browse/LU-811</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running Lustre 2.1 client on a debug kernel we got the following warning from the lock validator.  I suspect this may be a false alarm since we didn&apos;t deadlock and the cl_lockset comments suggest holding multiple locks is unavoidable in some cases.  Reporting here just in case this is a real bug.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;=============================================
[ INFO: possible recursive locking detected ]
2.6.32-207.1chaos.ch5.x86_64.debug #1
---------------------------------------------
sh/5936 is trying to acquire lock:
 (EXT){+.+.+.}, at: [&amp;lt;ffffffffa05f18c0&amp;gt;] cl_lock_lockdep_acquire+0x0/0x50 [obdclass]

but task is already holding lock:
 (EXT){+.+.+.}, at: [&amp;lt;ffffffffa05f18c0&amp;gt;] cl_lock_lockdep_acquire+0x0/0x50 [obdclass]

other info that might help us debug this:
2 locks held by sh/5936:
 #0:  (&amp;amp;lli-&amp;gt;lli_trunc_sem){.+.+.+}, at: [&amp;lt;ffffffffa08ca118&amp;gt;]
ll_file_io_generic+0x2c8/0x580 [lustre]
 #1:  (EXT){+.+.+.}, at: [&amp;lt;ffffffffa05f18c0&amp;gt;] cl_lock_lockdep_acquire+0x0/0x50
[obdclass]

stack backtrace:
Pid: 5936, comm: sh Not tainted 2.6.32-207.1chaos.ch5.x86_64.debug #1
Call Trace:
 [&amp;lt;ffffffff810af570&amp;gt;] ? __lock_acquire+0x11c0/0x1570
 [&amp;lt;ffffffff81013753&amp;gt;] ? native_sched_clock+0x13/0x60
 [&amp;lt;ffffffff81012c29&amp;gt;] ? sched_clock+0x9/0x10
 [&amp;lt;ffffffff8109d37d&amp;gt;] ? sched_clock_cpu+0xcd/0x110
 [&amp;lt;ffffffff810af9c4&amp;gt;] ? lock_acquire+0xa4/0x120
 [&amp;lt;ffffffffa05f18c0&amp;gt;] ? cl_lock_lockdep_acquire+0x0/0x50 [obdclass]
 [&amp;lt;ffffffffa05f18fd&amp;gt;] ? cl_lock_lockdep_acquire+0x3d/0x50 [obdclass]
 [&amp;lt;ffffffffa05f18c0&amp;gt;] ? cl_lock_lockdep_acquire+0x0/0x50 [obdclass]
 [&amp;lt;ffffffffa05f68e9&amp;gt;] ? cl_lock_request+0x1e9/0x200 [obdclass]
 [&amp;lt;ffffffff810adc9d&amp;gt;] ? trace_hardirqs_on_caller+0x14d/0x190
 [&amp;lt;ffffffffa0918d40&amp;gt;] ? cl_glimpse_lock+0x180/0x390 [lustre]
 [&amp;lt;ffffffffa08df942&amp;gt;] ? ll_inode_size_unlock+0x52/0xf0 [lustre]
 [&amp;lt;ffffffff8151fabb&amp;gt;] ? _spin_unlock+0x2b/0x40
 [&amp;lt;ffffffffa091cda6&amp;gt;] ? ccc_prep_size+0x1c6/0x280 [lustre]
 [&amp;lt;ffffffff810adc9d&amp;gt;] ? trace_hardirqs_on_caller+0x14d/0x190
 [&amp;lt;ffffffffa091b691&amp;gt;] ? cl2ccc_io+0x21/0x80 [lustre]
 [&amp;lt;ffffffffa0921faf&amp;gt;] ? vvp_io_read_start+0xbf/0x3d0 [lustre]
 [&amp;lt;ffffffffa05f3525&amp;gt;] ? cl_wait+0xb5/0x290 [obdclass]
 [&amp;lt;ffffffffa05f6ec8&amp;gt;] ? cl_io_start+0x68/0x170 [obdclass]
 [&amp;lt;ffffffffa05fb930&amp;gt;] ? cl_io_loop+0x110/0x1c0 [obdclass]
 [&amp;lt;ffffffffa08ca217&amp;gt;] ? ll_file_io_generic+0x3c7/0x580 [lustre]
 [&amp;lt;ffffffffa04c9c22&amp;gt;] ? cfs_hash_rw_unlock+0x12/0x30 [libcfs]
 [&amp;lt;ffffffffa04c8754&amp;gt;] ? cfs_hash_dual_bd_unlock+0x34/0x60 [libcfs]
 [&amp;lt;ffffffffa05ea8f9&amp;gt;] ? cl_env_get+0x29/0x350 [obdclass]
 [&amp;lt;ffffffffa08cf37c&amp;gt;] ? ll_file_aio_read+0x13c/0x310 [lustre]
 [&amp;lt;ffffffffa05eaa6d&amp;gt;] ? cl_env_get+0x19d/0x350 [obdclass]
 [&amp;lt;ffffffff81042c54&amp;gt;] ? __do_page_fault+0x244/0x4e0
 [&amp;lt;ffffffffa08cf6c1&amp;gt;] ? ll_file_read+0x171/0x310 [lustre]
 [&amp;lt;ffffffff8109d37d&amp;gt;] ? sched_clock_cpu+0xcd/0x110
 [&amp;lt;ffffffff810aa27d&amp;gt;] ? trace_hardirqs_off+0xd/0x10
 [&amp;lt;ffffffff8109d4af&amp;gt;] ? cpu_clock+0x6f/0x80
 [&amp;lt;ffffffff81192875&amp;gt;] ? vfs_read+0xb5/0x1a0
 [&amp;lt;ffffffff811929b1&amp;gt;] ? sys_read+0x51/0x90
 [&amp;lt;ffffffff8100b0b2&amp;gt;] ? system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This is the test that was running at the time (while debugging a cgroup-related problem):&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; CGROUP_DIR=$(lssubsys -m memory | cut -d&apos; &apos; -f2)
 P=$CGROUP_DIR/test

 move_current_to_cgroup() {
        echo &amp;gt; $1/tasks
 }

 clean_up_all() {
        move_current_to_cgroup $CGROUP_DIR
        rm ./tmpfile
        rmdir $CGROUP_DIR/test/A
        rmdir $CGROUP_DIR/test
        exit 1
 }

 trap clean_up_all INT

 mkdir $P
 echo &amp;gt; $P/tasks

 while sleep 1; do
        date
        T=$P/A       
        mkdir $T
        move_current_to_cgroup $T
        echo 300M &amp;gt; $T/memory.limit_in_bytes
        cat /proc/self/cgroup
        dd if=/dev/zero of=./tmpfile bs=4096 count=100000
        move_current_to_cgroup $P
        cat /proc/self/cgroup
        echo 0 &amp;gt; $T/memory.force_empty
        rmdir $T
        rm ./tmpfile
 done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>RHEL 6.2 &lt;br/&gt;
kernel 2.6.32-207.1chaos.ch5.x86_64.debug</environment>
        <key id="12296">LU-811</key>
            <summary>possible recursive locking detected on client</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="jay">Jinshan Xiong</assignee>
                                    <reporter username="nedbass">Ned Bass</reporter>
                        <labels>
                    </labels>
                <created>Tue, 1 Nov 2011 14:50:02 +0000</created>
                <updated>Mon, 4 Jun 2012 02:46:58 +0000</updated>
                            <resolved>Mon, 4 Jun 2012 02:46:58 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="22265" author="jay" created="Tue, 1 Nov 2011 15:26:59 +0000"  >&lt;p&gt;Can you please apply the patch set at &lt;a href=&quot;http://review.whamcloud.com/#change,1281,patchset=3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,1281,patchset=3&lt;/a&gt;, and try again. Thanks.&lt;/p&gt;</comment>
                            <comment id="22296" author="nedbass" created="Wed, 2 Nov 2011 15:51:31 +0000"  >&lt;p&gt;Hi Jinshan,&lt;/p&gt;

&lt;p&gt;I&apos;m testing with the patch set now.  So far, I haven&apos;t been able to reproduce the exact warning as above, with or without the patch.  However,  I&apos;ve seen the following similar warning at mount time, both with and without the patch.  (I&apos;ve also encountered a circular locking warning that should probably be tracked in a separate issue.)&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;=============================================
[ INFO: possible recursive locking detected ]
2.6.32-207.1chaos.ch5.x86_64.debug #1
---------------------------------------------
ll_cfg_requeue/3038 is trying to acquire lock:
 (&amp;amp;cli-&amp;gt;cl_sem){.+.+.+}, at: [&amp;lt;ffffffffa068120e&amp;gt;] sptlrpc_conf_client_adapt+0x6e/0x200 [ptlrpc]

but task is already holding lock:
 (&amp;amp;cli-&amp;gt;cl_sem){.+.+.+}, at: [&amp;lt;ffffffffa08750d9&amp;gt;] do_requeue+0x39/0x190 [mgc]

other info that might help us debug this:
2 locks held by ll_cfg_requeue/3038:
 #0:  (&amp;amp;cli-&amp;gt;cl_sem){.+.+.+}, at: [&amp;lt;ffffffffa08750d9&amp;gt;] do_requeue+0x39/0x190 [mgc]
 #1:  (&amp;amp;cld-&amp;gt;cld_lock){+.+.+.}, at: [&amp;lt;ffffffffa08734bd&amp;gt;] mgc_process_log+0x5d/0x1430 [mgc]

stack backtrace:
Pid: 3038, comm: ll_cfg_requeue Tainted: G        W  ----------------   2.6.32-207.1chaos.ch5.x86_64.debug #1
Call Trace:
 [&amp;lt;ffffffff810af570&amp;gt;] ? __lock_acquire+0x11c0/0x1570
 [&amp;lt;ffffffff81012c29&amp;gt;] ? sched_clock+0x9/0x10
 [&amp;lt;ffffffff8109d245&amp;gt;] ? sched_clock_local+0x25/0x90
 [&amp;lt;ffffffff8109d368&amp;gt;] ? sched_clock_cpu+0xb8/0x110
 [&amp;lt;ffffffff810af9c4&amp;gt;] ? lock_acquire+0xa4/0x120
 [&amp;lt;ffffffffa068120e&amp;gt;] ? sptlrpc_conf_client_adapt+0x6e/0x200 [ptlrpc]
 [&amp;lt;ffffffff81038458&amp;gt;] ? pvclock_clocksource_read+0x58/0xd0
 [&amp;lt;ffffffff8151e4c1&amp;gt;] ? down_read+0x51/0xa0
 [&amp;lt;ffffffffa068120e&amp;gt;] ? sptlrpc_conf_client_adapt+0x6e/0x200 [ptlrpc]
 [&amp;lt;ffffffff81012c29&amp;gt;] ? sched_clock+0x9/0x10
 [&amp;lt;ffffffffa068120e&amp;gt;] ? sptlrpc_conf_client_adapt+0x6e/0x200 [ptlrpc]
 [&amp;lt;ffffffff8109d368&amp;gt;] ? sched_clock_cpu+0xb8/0x110
 [&amp;lt;ffffffffa07ab7cc&amp;gt;] ? mdc_set_info_async+0x67c/0x9d0 [mdc]
 [&amp;lt;ffffffff810ad39d&amp;gt;] ? lock_release_holdtime+0x3d/0x190
 [&amp;lt;ffffffffa0523272&amp;gt;] ? class_notify_sptlrpc_conf+0x212/0x540 [obdclass]
 [&amp;lt;ffffffffa08740f7&amp;gt;] ? mgc_process_log+0xc97/0x1430 [mgc]
 [&amp;lt;ffffffffa086fae0&amp;gt;] ? mgc_blocking_ast+0x0/0x4b0 [mgc]
 [&amp;lt;ffffffffa0620e20&amp;gt;] ? ldlm_completion_ast+0x0/0x740 [ptlrpc]
 [&amp;lt;ffffffffa0875106&amp;gt;] ? do_requeue+0x66/0x190 [mgc]
 [&amp;lt;ffffffffa0875438&amp;gt;] ? mgc_requeue_thread+0x208/0x640 [mgc]
 [&amp;lt;ffffffff810adc9d&amp;gt;] ? trace_hardirqs_on_caller+0x14d/0x190
 [&amp;lt;ffffffff81062370&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0875230&amp;gt;] ? mgc_requeue_thread+0x0/0x640 [mgc]
 [&amp;lt;ffffffff810adc9d&amp;gt;] ? trace_hardirqs_on_caller+0x14d/0x190
 [&amp;lt;ffffffffa0875230&amp;gt;] ? mgc_requeue_thread+0x0/0x640 [mgc]
 [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
 [&amp;lt;ffffffff8100bb50&amp;gt;] ? restore_args+0x0/0x30
 [&amp;lt;ffffffffa0875230&amp;gt;] ? mgc_requeue_thread+0x0/0x640 [mgc]
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="22649" author="jay" created="Mon, 7 Nov 2011 13:20:59 +0000"  >&lt;p&gt;Hi Ned,&lt;/p&gt;

&lt;p&gt;It seems all right because they are acquiring cl_sem for different obd, the 1st cl_sem is for MGC and the 2nd one is for MDC. This may be not good but let&apos;s ignore it unless we can find another path to grab cl_sems in reverse order.&lt;/p&gt;</comment>
                            <comment id="39894" author="pjones" created="Mon, 4 Jun 2012 02:46:58 +0000"  >&lt;p&gt;Does not seem to be a bug. Will reopen if this occurs again and causes a problem in production&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="11545">LU-619</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvhsf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6531</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>