<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:58:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6271] (osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:</title>
                <link>https://jira.whamcloud.com/browse/LU-6271</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While performing failover testing:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;LustreError: 167-0: lustre-OST0000-osc-ffff88083b339000: This client was evicted by lustre-OST0000; in progress operations using &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; service will fail.
Feb 23 09:22:48 iwc260 kernel: LustreError: 51585:0:(ldlm_resource.c:777:ldlm_resource_complain()) lustre-OST0000-osc-ffff88083b339000: namespace resource [0x2d097bb:0x0:0x0].0 (ffff88106a992e40) refcount nonzero (1) after lock cleanup; forcing cleanup.
Feb 23 09:22:48 iwc260 kernel: LustreError: 51585:0:(ldlm_resource.c:1374:ldlm_resource_dump()) --- Resource: [0x2d097bb:0x0:0x0].0 (ffff88106a992e40) refcount = 3
Feb 23 09:22:48 iwc260 kernel: LustreError: 51585:0:(ldlm_resource.c:1377:ldlm_resource_dump()) Granted locks (in reverse order):
Feb 23 09:22:48 iwc260 kernel: LustreError: 51585:0:(ldlm_resource.c:1380:ldlm_resource_dump()) ### ### ns: lustre-OST0000-osc-ffff88083b339000 lock: ffff88106a60d540/0xa1f402a8812988f4 lrc: 3/0,1 mode: PW/PW res: [0x2d097bb:0x0:0x0].0 rrc: 3 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;8191) flags: 0x126400020000 nid: local remote: 0x72a9f4c80e66e07b expref: -99 pid: 51570 timeout: 0 lvb_type: 1
Feb 23 09:22:48 iwc260 kernel: Lustre: lustre-OST0000-osc-ffff88083b339000: Connection restored to lustre-OST0000 (at 192.168.120.13@o2ib)
Feb 23 09:22:49 iwc260 kernel: LustreError: 90252:0:(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:
Feb 23 09:22:49 iwc260 kernel: LustreError: 90252:0:(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:
Feb 23 09:22:49 iwc260 kernel: LustreError: 90252:0:(osc_cache.c:3150:discard_cb()) LBUG
Feb 23 09:22:49 iwc260 kernel: LustreError: 90252:0:(osc_cache.c:3150:discard_cb()) LBUG
Feb 23 09:22:49 iwc260 kernel: Pid: 90252, comm: ldlm_bl_36
Feb 23 09:22:49 iwc260 kernel:
Feb 23 09:22:49 iwc260 kernel: Call Trace:
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0435895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0435e97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b2ba56&amp;gt;] discard_cb+0x156/0x190 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b2bdcc&amp;gt;] osc_page_gang_lookup+0x1ac/0x330 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b2b900&amp;gt;] ? discard_cb+0x0/0x190 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b2c094&amp;gt;] osc_lock_discard_pages+0x144/0x240 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa04461c1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b2b900&amp;gt;] ? discard_cb+0x0/0x190 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b2298b&amp;gt;] osc_lock_flush+0x8b/0x260 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b22e08&amp;gt;] osc_ldlm_blocking_ast+0x2a8/0x3c0 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0761a6c&amp;gt;] ldlm_cancel_callback+0x6c/0x170 [ptlrpc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa077450a&amp;gt;] ldlm_cli_cancel_local+0x8a/0x470 [ptlrpc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0779120&amp;gt;] ldlm_cli_cancel+0x60/0x360 [ptlrpc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa0b22c3b&amp;gt;] osc_ldlm_blocking_ast+0xdb/0x3c0 [osc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa04461c1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa077cb60&amp;gt;] ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa077d0c1&amp;gt;] ldlm_bl_thread_main+0x291/0x3f0 [ptlrpc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffffa077ce30&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3f0 [ptlrpc]
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
Feb 23 09:22:49 iwc260 kernel: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Hyperon</environment>
        <key id="28805">LU-6271</key>
            <summary>(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jay">Jinshan Xiong</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Mon, 23 Feb 2015 17:48:52 +0000</created>
                <updated>Sun, 28 Jun 2020 17:16:31 +0000</updated>
                            <resolved>Sun, 25 Oct 2015 12:45:50 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>25</watches>
                                                                            <comments>
                            <comment id="107664" author="jlevi" created="Mon, 23 Feb 2015 18:13:28 +0000"  >&lt;p&gt;John,&lt;br/&gt;
Could you please have a look and comment on this one?&lt;br/&gt;
Thank you!&lt;/p&gt;</comment>
                            <comment id="107674" author="jhammond" created="Mon, 23 Feb 2015 18:44:18 +0000"  >&lt;p&gt;Hi Cliff, could you please include the exact version you were testing along with a description of your failover testing?&lt;/p&gt;</comment>
                            <comment id="107803" author="cliffw" created="Tue, 24 Feb 2015 18:29:24 +0000"  >&lt;p&gt;We are running 2.6.94 lustre-master build 2856. &lt;br/&gt;
Failover testing consists of &lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;power an OSS node off.&lt;/li&gt;
	&lt;li&gt;mount the devices from the dead OSS on the failover node.&lt;/li&gt;
	&lt;li&gt;sleep for awhile&lt;/li&gt;
	&lt;li&gt;power up the dead OSS&lt;/li&gt;
	&lt;li&gt;failback the devices.&lt;br/&gt;
This is done &apos;manually&apos; via a script, as we do not have HA software installed. &lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="107820" author="jhammond" created="Tue, 24 Feb 2015 19:25:18 +0000"  >&lt;p&gt;What were the clients running?&lt;/p&gt;</comment>
                            <comment id="107822" author="cliffw" created="Tue, 24 Feb 2015 19:47:36 +0000"  >&lt;p&gt;They were running the client bits from the same build&lt;/p&gt;</comment>
                            <comment id="107824" author="jhammond" created="Tue, 24 Feb 2015 19:57:42 +0000"  >&lt;p&gt;What application(s) were they running?&lt;/p&gt;</comment>
                            <comment id="107828" author="cliffw" created="Tue, 24 Feb 2015 20:14:05 +0000"  >&lt;p&gt;They were running the SWL workload which is a mix of standard tests, including IOR, mutest, simul and others. &lt;/p&gt;</comment>
                            <comment id="108341" author="jay" created="Sun, 1 Mar 2015 08:37:33 +0000"  >&lt;p&gt;Does the workload include Direct IO and how often can it be reproduced?&lt;/p&gt;</comment>
                            <comment id="108342" author="jay" created="Sun, 1 Mar 2015 08:47:00 +0000"  >&lt;p&gt;I took a look at the log. From the log, it looks like the write back was not triggered at all. The client was discarding the pages starting from index 4608, and the assertion was hit when it tried to discard page 5120. The log is too short and some key information is missing because I want to know the lock range, type, and what pages were actually written back.&lt;/p&gt;

&lt;p&gt;Cliff, can you rerun the test case, with a little bit more debug buffer size, say 100M, and enable cache on the client and then reproduce this issue? As follows:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lctl set_param debug_size=100M
lctl set_param debug=+cache
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="108403" author="jay" created="Mon, 2 Mar 2015 14:57:13 +0000"  >&lt;p&gt;BTW, do the test programs use group lock?&lt;/p&gt;</comment>
                            <comment id="108415" author="cliffw" created="Mon, 2 Mar 2015 15:54:58 +0000"  >&lt;p&gt;Our normal default is &lt;br/&gt;
lctl set_param panic_on_lbug=0;lctl set_param debug_mb=512M&lt;/p&gt;

&lt;p&gt;So, no idea why the info you want is not in that log. I am past that build in the test cycle, I will repeat the test when I get the next build test cycle.&lt;/p&gt;</comment>
                            <comment id="116181" author="lidongyang" created="Fri, 22 May 2015 06:16:11 +0000"  >&lt;p&gt;We encountered the same problem at NCI, when the OSTs evicted the clients, many clients LBUGed with the same context above. Now we can reproduce this with a simple setup:&lt;br/&gt;
on the client, have a script monitor and keep the number of write processes, e.g. IOR. on the OSTs, have another script does the eviction constantly with an interval, e.g. 10 seconds.&lt;/p&gt;

&lt;p&gt;The client always LBUG with the ldlm_resource_dump message. If message appears then it&apos;s close. sometimes it takes resource dump messages for a LBUG.&lt;/p&gt;</comment>
                            <comment id="116182" author="gerrit" created="Fri, 22 May 2015 06:16:47 +0000"  >&lt;p&gt;Li Dongyang (dongyang.li@anu.edu.au) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/14915&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14915&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; ldlm: cleanup locks before activating import&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: aeff14823489ef3c53982d440a1df2c583c802d6&lt;/p&gt;</comment>
                            <comment id="116577" author="jay" created="Wed, 27 May 2015 18:19:39 +0000"  >&lt;p&gt;The lock being canceled belongs to the previous instance of import. The dirty pages are actually protected by a new lock. They shouldn&apos;t be written back at this time at all. I&apos;m working on a patch&lt;/p&gt;</comment>
                            <comment id="116625" author="lidongyang" created="Thu, 28 May 2015 00:30:54 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
IMHO the client should clean up everything in cleanup_resource() before reconnecting. Doing this can also avoid the annoying ldlm_resource_complain messages. On our login nodes where users are doing many IOs to their files, once it got evicted from the OST, the cpu will spin for a long time dumping the locks and the box is not responsive at all.&lt;/p&gt;</comment>
                            <comment id="116647" author="gerrit" created="Thu, 28 May 2015 07:36:20 +0000"  >&lt;p&gt;Jinshan Xiong (jinshan.xiong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/14989&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14989&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: handle osc eviction correctly&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 8c01bf6f3c689f40f5863b30478c1f61947e316f&lt;/p&gt;</comment>
                            <comment id="116689" author="jay" created="Thu, 28 May 2015 15:14:02 +0000"  >&lt;p&gt;Hi Dongyang,&lt;/p&gt;

&lt;p&gt;Will you try the patch 14989 and see how it goes?&lt;/p&gt;

&lt;p&gt;Jinshan&lt;/p&gt;</comment>
                            <comment id="116787" author="lidongyang" created="Thu, 28 May 2015 23:44:12 +0000"  >&lt;p&gt;Hi Jinshan,&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;3&amp;gt;LustreError: 31958:0:(ldlm_resource.c:776:ldlm_resource_complain()) testfs-OST0000-osc-ffff880139269c00: namespace resource [0x22:0x0:0x0].0 (ffff880101507840) refcount nonzero (1) after lock cleanup; forcing cleanup.
&amp;lt;3&amp;gt;LustreError: 31958:0:(ldlm_resource.c:1369:ldlm_resource_dump()) --- Resource: [0x22:0x0:0x0].0 (ffff880101507840) refcount = 2
&amp;lt;3&amp;gt;LustreError: 31958:0:(ldlm_resource.c:1372:ldlm_resource_dump()) Granted locks (in reverse order):
&amp;lt;3&amp;gt;LustreError: 31958:0:(ldlm_resource.c:1375:ldlm_resource_dump()) ### ### ns: testfs-OST0000-osc-ffff880139269c00 lock: ffff8800bbb524c0/0x51b0732653777408 lrc: 4/0,1 mode: PW/PW res: [0x22:0x0:0x0].0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;1048575) flags: 0x126400000000 nid: local remote: 0xdc8209701f2ad6da expref: -99 pid: 31677 timeout: 0 lvb_type: 1
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:3155:discard_cb()) page@ffff88010c35d400[3 ffff88003780db38 1 0 1 ffff8800be860220 (null)]
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:3155:discard_cb()) vvp-page@ffff88010c35d468(0:0) vm@ffffea00044f7068 40000000000879 3:0 ffff88010c35d400 5888 lru
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:3155:discard_cb()) lov-page@ffff88010c35d4a8, raid0
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:3155:discard_cb()) osc-page@ffff88010c35d510 5888: 1&amp;lt; 0x845fed 258 0 + - &amp;gt; 2&amp;lt; 24117248 0 4096 0x0 0x520 | (null) ffff88013c46a568 ffff880037882d90 &amp;gt; 3&amp;lt; + ffff88010bcd7500 1 0 0 &amp;gt; 4&amp;lt; 0 1 8 18446744073665499136 - | + - + - &amp;gt; 5&amp;lt; + - + - | 0 - | 2560 - -&amp;gt;
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:3155:discard_cb()) end page@ffff88010c35d400
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:3155:discard_cb()) discard dirty page?
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:2466:osc_teardown_async_page()) extent ffff880112ed9288@{[5888 -&amp;gt; 6143/6143], [2|0|+|locking|wi|ffff880037882d90], [1048576|256|+|-|ffff8801386b9880|256|ffff88013c193500]} trunc at 5888.
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_cache.c:2466:osc_teardown_async_page()) ### extent: ffff880112ed9288
&amp;lt;3&amp;gt; ns: testfs-OST0000-osc-ffff880139269c00 lock: ffff8801386b9880/0x51b07326537775eb lrc: 35/0,3 mode: PW/PW res: [0x22:0x0:0x0].0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 23068672-&amp;gt;24117247) flags: 0x20000000000 nid: local remote: 0xdc8209701f2ad6fd expref: -99 pid: 31718 timeout: 0 lvb_type: 1
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_page.c:313:osc_page_delete()) page@ffff88010c35d400[3 ffff88003780db38 4 0 1 (null) (null)]
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_page.c:313:osc_page_delete()) vvp-page@ffff88010c35d468(0:0) vm@ffffea00044f7068 40000000000879 3:0 ffff88010c35d400 5888 lru
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_page.c:313:osc_page_delete()) lov-page@ffff88010c35d4a8, raid0
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_page.c:313:osc_page_delete()) osc-page@ffff88010c35d510 5888: 1&amp;lt; 0x845fed 258 0 + - &amp;gt; 2&amp;lt; 24117248 0 4096 0x0 0x520 | (null) ffff88013c46a568 ffff880037882d90 &amp;gt; 3&amp;lt; + ffff88010bcd7500 0 0 0 &amp;gt; 4&amp;lt; 0 1 8 18446744073644531712 + | + - + - &amp;gt; 5&amp;lt; + - + - | 0 - | 7680 - -&amp;gt;
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_page.c:313:osc_page_delete()) end page@ffff88010c35d400
&amp;lt;3&amp;gt;
&amp;lt;3&amp;gt;LustreError: 28596:0:(osc_page.c:313:osc_page_delete()) Trying to teardown failed: -16
&amp;lt;0&amp;gt;LustreError: 28596:0:(osc_page.c:314:osc_page_delete()) ASSERTION( 0 ) failed: 
&amp;lt;0&amp;gt;LustreError: 28596:0:(osc_page.c:314:osc_page_delete()) LBUG
&amp;lt;4&amp;gt;Pid: 28596, comm: ldlm_bl_00
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0251875&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0251e77&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa09386a6&amp;gt;] osc_page_delete+0x446/0x4e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b05bd&amp;gt;] cl_page_delete0+0x7d/0x210 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b078d&amp;gt;] cl_page_delete+0x3d/0x110 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa085f63d&amp;gt;] ll_invalidatepage+0x8d/0x160 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa086ee65&amp;gt;] vvp_page_discard+0xc5/0x160 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03aef18&amp;gt;] cl_page_invoid+0x68/0x160 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03af023&amp;gt;] cl_page_discard+0x13/0x20 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0943cb8&amp;gt;] discard_cb+0x88/0x1e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa094414c&amp;gt;] osc_page_gang_lookup+0x1ac/0x330 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0943c30&amp;gt;] ? discard_cb+0x0/0x1e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0943c30&amp;gt;] ? discard_cb+0x0/0x1e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0952108&amp;gt;] osc_cache_flush+0x178/0x410 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa093aa68&amp;gt;] osc_ldlm_blocking_ast+0x2a8/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa053a4cc&amp;gt;] ldlm_cancel_callback+0x6c/0x170 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa054c35a&amp;gt;] ldlm_cli_cancel_local+0x8a/0x470 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0550f80&amp;gt;] ldlm_cli_cancel+0x60/0x360 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa093a89b&amp;gt;] osc_ldlm_blocking_ast+0xdb/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa05549c0&amp;gt;] ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0554f21&amp;gt;] ldlm_bl_thread_main+0x291/0x3f0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81064b90&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0554c90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3f0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109e66e&amp;gt;] kthread+0x9e/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109e5d0&amp;gt;] ? kthread+0x0/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 28596, comm: ldlm_bl_00 Not tainted 2.6.32-504.12.2.el6.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8152933c&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0251ecb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa09386a6&amp;gt;] ? osc_page_delete+0x446/0x4e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b05bd&amp;gt;] ? cl_page_delete0+0x7d/0x210 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03b078d&amp;gt;] ? cl_page_delete+0x3d/0x110 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa085f63d&amp;gt;] ? ll_invalidatepage+0x8d/0x160 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa086ee65&amp;gt;] ? vvp_page_discard+0xc5/0x160 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03aef18&amp;gt;] ? cl_page_invoid+0x68/0x160 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03af023&amp;gt;] ? cl_page_discard+0x13/0x20 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0943cb8&amp;gt;] ? discard_cb+0x88/0x1e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa094414c&amp;gt;] ? osc_page_gang_lookup+0x1ac/0x330 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0943c30&amp;gt;] ? discard_cb+0x0/0x1e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0943c30&amp;gt;] ? discard_cb+0x0/0x1e0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0952108&amp;gt;] ? osc_cache_flush+0x178/0x410 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa093aa68&amp;gt;] ? osc_ldlm_blocking_ast+0x2a8/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa053a4cc&amp;gt;] ? ldlm_cancel_callback+0x6c/0x170 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa054c35a&amp;gt;] ? ldlm_cli_cancel_local+0x8a/0x470 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0550f80&amp;gt;] ? ldlm_cli_cancel+0x60/0x360 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa093a89b&amp;gt;] ? osc_ldlm_blocking_ast+0xdb/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa05549c0&amp;gt;] ? ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0554f21&amp;gt;] ? ldlm_bl_thread_main+0x291/0x3f0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81064b90&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0554c90&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3f0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109e66e&amp;gt;] ? kthread+0x9e/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109e5d0&amp;gt;] ? kthread+0x0/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Could you have a look at patch 14915 please? We have been testing it for a long time before submitting it.&lt;/p&gt;</comment>
                            <comment id="121652" author="apercher" created="Mon, 20 Jul 2015 09:10:11 +0000"  >&lt;p&gt;HI,&lt;br/&gt;
  At Cea with lustre 2.7, we meet the same LBUG &lt;br/&gt;
 (osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&lt;br/&gt;
  In my case the ldlm thread has the same stack and I have another user thread who are currently writing on the&lt;br/&gt;
  concerning file  and the stack was :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;      cl_sync_io_wait()
      cl_io_submit_sync()
      ll_page_sync-io()
      ll_write_begin()
      generic_file_buffered_write()
      __generic_file_aio_write()
      vvp_io_write_start()
      cl_io_start()
      cl_io_loop()
      ll_file_io_generic()
      ll_file_aio-write()
      ll_file_write()
      vfs_write()
      sys_write()
      system_call_fastpath()
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;  This thread do a write in 1024 (0x400) pages, and the page concerning by the LBug is a page&lt;br/&gt;
  who are in this write() &lt;br/&gt;
  The writing thread waiting the sync of the last  page 1024 (0x400)&lt;br/&gt;
  If I understand well patch 14915 need some additional work, could you confirm ?&lt;br/&gt;
Antoine&lt;/p&gt;</comment>
                            <comment id="121702" author="jay" created="Mon, 20 Jul 2015 17:00:29 +0000"  >&lt;p&gt;Hi Antoine,&lt;/p&gt;

&lt;p&gt;Did you notice that this OSC had ever been evicted from your occurrence?&lt;/p&gt;</comment>
                            <comment id="121834" author="bfaccini" created="Tue, 21 Jul 2015 16:27:17 +0000"  >&lt;p&gt;Hello Jinshan,&lt;br/&gt;
Talking about this problem with Antoine today, I think that the answer is yes, but it need to be double-checked by Antoine on-site.&lt;/p&gt;</comment>
                            <comment id="121901" author="apercher" created="Wed, 22 Jul 2015 06:24:19 +0000"  >&lt;p&gt;Hi,&lt;br/&gt;
 Yes, for this client, the 3 osts concerning by the current I/Os has evicted and reconnected just before the assert. And for me that is a &quot;short eviction&quot; due to huge I/O between these OSTs and this client.&lt;br/&gt;
Antoine    &lt;/p&gt;</comment>
                            <comment id="122389" author="lidongyang" created="Tue, 28 Jul 2015 10:39:28 +0000"  >&lt;p&gt;Here is a demo code to show how to reproduce the problem with grouplock:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#include &amp;lt;sys/types.h&amp;gt;
#include &amp;lt;sys/stat.h&amp;gt;
#include &amp;lt;fcntl.h&amp;gt;
#include &amp;lt;sys/ioctl.h&amp;gt;
#include &amp;lt;stdlib.h&amp;gt;
#include &amp;lt;string.h&amp;gt;
#include &amp;lt;unistd.h&amp;gt;
#include &amp;lt;stdio.h&amp;gt;

#include &amp;lt;lustre/lustre_user.h&amp;gt;

&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; main(&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; argc, &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *argv[])
{
        &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; fd;
        &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; rc;
        &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; gid;
        &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *buf;
        &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; i;

        fd = open(&amp;lt;file&amp;gt;, O_RDWR|O_CREAT|O_TRUNC);
        gid = atoi(argv[1]);
        rc = ioctl(fd, LL_IOC_GROUP_LOCK, gid);
        printf(&lt;span class=&quot;code-quote&quot;&gt;&quot;ioctl %d\n&quot;&lt;/span&gt;, rc);
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc) 
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; rc;
        buf = malloc(1&amp;lt;&amp;lt;20);
        memset(buf, 1, 1&amp;lt;&amp;lt;20);
        &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (1)
                write(fd, buf, 1&amp;lt;&amp;lt;20);
        &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; 0;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Run this with a parameter as gid, then evict the client from the OST.&lt;/p&gt;</comment>
                            <comment id="122529" author="jay" created="Wed, 29 Jul 2015 05:13:30 +0000"  >&lt;p&gt;I just update &lt;a href=&quot;http://review.whamcloud.com/14989&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14989&lt;/a&gt; it should be good for general IO but group lock is not yet supported. Please verify if this is the case on your side.&lt;/p&gt;</comment>
                            <comment id="122584" author="cliffw" created="Wed, 29 Jul 2015 17:54:22 +0000"  >&lt;p&gt;We have hit this on lola, with 2.7.0&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;fff8807e5361e40) refcount = 3
LustreError: 118965:0:(ldlm_resource.c:1377:ldlm_resource_dump()) Granted locks (in reverse order):
Lustre: soaked-OST000f-osc-ffff88082297e800: Connection restored to soaked-OST000f (at 192.168.1.105@o2ib10)
LustreError: 73177:0:(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:
LustreError: 73177:0:(osc_cache.c:3150:discard_cb()) LBUG
Pid: 73177, comm: ldlm_bl_100

Call Trace:
 [&amp;lt;ffffffffa046c895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
 [&amp;lt;ffffffffa046ce97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
 [&amp;lt;ffffffffa0ba8046&amp;gt;] discard_cb+0x156/0x190 [osc]
 [&amp;lt;ffffffffa0ba83bc&amp;gt;] osc_page_gang_lookup+0x1ac/0x330 [osc]
 [&amp;lt;ffffffffa0ba7ef0&amp;gt;] ? discard_cb+0x0/0x190 [osc]
 [&amp;lt;ffffffffa0ba8684&amp;gt;] osc_lock_discard_pages+0x144/0x240 [osc]
 [&amp;lt;ffffffffa0ba7ef0&amp;gt;] ? discard_cb+0x0/0x190 [osc]  
 [&amp;lt;ffffffffa0b9ef7b&amp;gt;] osc_lock_flush+0x8b/0x260 [osc]
 [&amp;lt;ffffffffa0b9f3f8&amp;gt;] osc_ldlm_blocking_ast+0x2a8/0x3c0 [osc]
 [&amp;lt;ffffffffa0764a6c&amp;gt;] ldlm_cancel_callback+0x6c/0x170 [ptlrpc]
 [&amp;lt;ffffffffa077732a&amp;gt;] ldlm_cli_cancel_local+0x8a/0x470 [ptlrpc]
 [&amp;lt;ffffffffa077bf40&amp;gt;] ldlm_cli_cancel+0x60/0x360 [ptlrpc]
 [&amp;lt;ffffffffa0b9f22b&amp;gt;] osc_ldlm_blocking_ast+0xdb/0x3c0 [osc]
 [&amp;lt;ffffffffa077f980&amp;gt;] ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
 [&amp;lt;ffffffffa077fee1&amp;gt;] ldlm_bl_thread_main+0x291/0x3f0 [ptlrpc]
 [&amp;lt;ffffffff81064be0&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa077fc50&amp;gt;] ? ldlm_bl_thread_main+0x0/0x3f0 [ptlrpc]
 [&amp;lt;ffffffff8109e78e&amp;gt;] kthread+0x9e/0xc0
 [&amp;lt;ffffffff8100c28a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109e6f0&amp;gt;] ? kthread+0x0/0xc0
 [&amp;lt;ffffffff8100c280&amp;gt;] ? child_rip+0x0/0x20

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="122585" author="cliffw" created="Wed, 29 Jul 2015 17:55:00 +0000"  >&lt;p&gt;Lustre Log from LBUG&lt;/p&gt;</comment>
                            <comment id="122954" author="lidongyang" created="Sun, 2 Aug 2015 23:20:26 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
The client is stuck after eviction with 14989 patched:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;PID: 15242  TASK: ffff880037cd3520  CPU: 0   COMMAND: &lt;span class=&quot;code-quote&quot;&gt;&quot;ll_imp_inval&quot;&lt;/span&gt;
 #0 [ffff88003de53b60] schedule at ffffffff81529ab0
 #1 [ffff88003de53c38] osc_object_invalidate at ffffffffa103a695 [osc]
 #2 [ffff88003de53cb8] osc_ldlm_resource_invalidate at ffffffffa102b769 [osc]
 #3 [ffff88003de53cf8] cfs_hash_for_each_relax at ffffffffa09726ab [libcfs]
 #4 [ffff88003de53d88] cfs_hash_for_each_nolock at ffffffffa09745ac [libcfs]
 #5 [ffff88003de53db8] osc_import_event at ffffffffa1036556 [osc]
 #6 [ffff88003de53e08] ptlrpc_invalidate_import at ffffffffa0c9d7a1 [ptlrpc]
 #7 [ffff88003de53ec8] ptlrpc_invalidate_import_thread at ffffffffa0ca0608 [ptlrpc]
 #8 [ffff88003de53ee8] kthread at ffffffff8109e78e
 #9 [ffff88003de53f48] kernel_thread at ffffffff8100c28a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;ve updated &lt;a href=&quot;http://review.whamcloud.com/#/c/14915/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/14915/&lt;/a&gt; now we skip the grouplocks during cleanup so we can reconnect to the servers, purge the extents of the grouplock,&lt;br/&gt;
and if we see any IO from the grouplock fd, just return -ENOLCK.&lt;br/&gt;
The grouplock still needs to be released manually by the user. It&apos;s not ideal but at least the client is not crashing any more.&lt;/p&gt;</comment>
                            <comment id="124916" author="jay" created="Mon, 24 Aug 2015 17:32:48 +0000"  >&lt;p&gt;In that case, let&apos;s land the patch 14915 at least it&apos;s working.&lt;/p&gt;</comment>
                            <comment id="124919" author="jay" created="Mon, 24 Aug 2015 17:43:26 +0000"  >&lt;p&gt;Hi Dongyang,&lt;/p&gt;

&lt;p&gt;Do you have a program to reproduce the problem?&lt;/p&gt;

&lt;p&gt;Jinshan&lt;/p&gt;</comment>
                            <comment id="125786" author="di.wang" created="Mon, 31 Aug 2015 19:53:00 +0000"  >&lt;p&gt;We also found this problem on hyperion test. Here is the debug log. &lt;/p&gt;</comment>
                            <comment id="125891" author="adilger" created="Tue, 1 Sep 2015 17:46:21 +0000"  >&lt;p&gt;Are the two patches here complimentary (i.e. both should land) or is only one of the two patches needed?&lt;/p&gt;</comment>
                            <comment id="126073" author="jay" created="Wed, 2 Sep 2015 18:03:00 +0000"  >&lt;p&gt;Hi Andreas, the two patches are exclusive. I would rather land &lt;a href=&quot;http://review.whamcloud.com/14989&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14989&lt;/a&gt;, but it&apos;s fine for me to land &lt;a href=&quot;http://review.whamcloud.com/#/c/14915&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/14915&lt;/a&gt; if you think it is a better solution.&lt;/p&gt;</comment>
                            <comment id="126160" author="lidongyang" created="Thu, 3 Sep 2015 14:30:20 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
was sick in the past week, sorry for the late reply.&lt;br/&gt;
The reproducer is pretty simple:&lt;br/&gt;
On the client, have some threads doing write to a file, ior would do, and start e.g. 20 of them.&lt;br/&gt;
On the OSS evict the client over and over again, say 10 sec.&lt;br/&gt;
Upon eviction on the client the processes will die as a result, just bump the number up back to 20.&lt;/p&gt;

&lt;p&gt;Keep it running for a while, and it will hit the problem pretty quick.&lt;br/&gt;
BTW with patch set 4 from &lt;a href=&quot;http://review.whamcloud.com/14989&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14989&lt;/a&gt; the client still stuck at one point:&lt;br/&gt;
crash&amp;gt; bt 11413&lt;br/&gt;
PID: 11413  TASK: ffff88003b1e0ab0  CPU: 0   COMMAND: &quot;ll_imp_inval&quot;&lt;br/&gt;
 #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59b60&amp;#93;&lt;/span&gt; schedule at ffffffff81529ab0&lt;br/&gt;
 #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59c38&amp;#93;&lt;/span&gt; osc_object_invalidate at ffffffffa090468c &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59cb8&amp;#93;&lt;/span&gt; osc_ldlm_resource_invalidate at ffffffffa08f5769 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59cf8&amp;#93;&lt;/span&gt; cfs_hash_for_each_relax at ffffffffa022865b &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59d88&amp;#93;&lt;/span&gt; cfs_hash_for_each_nolock at ffffffffa022a53c &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59db8&amp;#93;&lt;/span&gt; osc_import_event at ffffffffa0900546 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59e08&amp;#93;&lt;/span&gt; ptlrpc_invalidate_import at ffffffffa055fcb1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59ec8&amp;#93;&lt;/span&gt; ptlrpc_invalidate_import_thread at ffffffffa0562b18 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59ee8&amp;#93;&lt;/span&gt; kthread at ffffffff8109e78e&lt;br/&gt;
 #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff88003bd59f48&amp;#93;&lt;/span&gt; kernel_thread at ffffffff8100c28a&lt;/p&gt;

&lt;p&gt;Seems it won&apos;t reconnect to the OSS.&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="126688" author="jay" created="Tue, 8 Sep 2015 17:52:23 +0000"  >&lt;p&gt;Hi Dongyang,&lt;/p&gt;

&lt;p&gt;I have been running this test program for over 10 hours on my testing node but I didn&apos;t reproduce this problem. I did this test on my VM nodes so this may be one of reasons why I couldn&apos;t reproduce it. Will you please reproduce it again and collect lustre logs and output of dmesg when you see the problem? Also it will be helpful to take a crashdump and then upload it, along with lustre modules, to our ftp site at: ftp.whamcloud.com so that I can take a look.&lt;/p&gt;

&lt;p&gt;Thanks in advance,&lt;br/&gt;
Jinshan&lt;/p&gt;</comment>
                            <comment id="127312" author="lidongyang" created="Tue, 15 Sep 2015 07:11:14 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
I finally got some time to generate the dump for you. I&apos;m using patchset 5 of &lt;a href=&quot;http://review.whamcloud.com/#/c/14989/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/14989/&lt;/a&gt; on top of master.&lt;br/&gt;
While I was trying to reproduce the problem, I got a client crash. I don&apos;t know if it&apos;s related but I got the dump as well.&lt;/p&gt;

&lt;p&gt;I tried to upload them to ftp.whamcloud.com/uploads. the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt;_pkgs.tar.gz was uploaded fine, which has all the lustre modules and debuginfo I used to reproduce the problem.&lt;br/&gt;
But when I tried to upload other 2 tarballs(crashdump of the crash and stuck), the transfer stalled despite multiple retries.&lt;br/&gt;
Is there another way to upload those files to you?&lt;/p&gt;

&lt;p&gt;Thanks&lt;br/&gt;
Dongyang&lt;/p&gt;</comment>
                            <comment id="127377" author="jay" created="Tue, 15 Sep 2015 18:12:38 +0000"  >&lt;p&gt;Another way would be to upload it to dropbox or google drive and share it to me. Please encrypt it if the crash dump has sensitive data and email me the password.&lt;/p&gt;</comment>
                            <comment id="127393" author="simmonsja" created="Tue, 15 Sep 2015 20:16:29 +0000"  >&lt;p&gt;We just ran into this during our are large scale testing on titan today.&lt;/p&gt;</comment>
                            <comment id="127432" author="gerrit" created="Wed, 16 Sep 2015 01:06:21 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/14989/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14989/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: handle osc eviction correctly&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 8f01f8b51d114b0d2d54a5ab7db3161782e52447&lt;/p&gt;</comment>
                            <comment id="127455" author="lidongyang" created="Wed, 16 Sep 2015 08:21:45 +0000"  >&lt;p&gt;I reckon it is too soon to land &lt;a href=&quot;http://review.whamcloud.com/14989/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14989/&lt;/a&gt;&lt;br/&gt;
With the patch the client still crashes when I run the reproducer, for example:&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 15348:0:(ldlm_resource.c:835:ldlm_resource_complain()) testfs-OST0000-osc-ffff880037fea000: namespace resource &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff88003deae500) refcount no&lt;br/&gt;
nzero (1) after lock cleanup; forcing cleanup.&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 15348:0:(ldlm_resource.c:1450:ldlm_resource_dump()) &amp;#8212; Resource: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff88003deae500) refcount = 2&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 15348:0:(ldlm_resource.c:1453:ldlm_resource_dump()) Granted locks (in reverse order):&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 15348:0:(ldlm_resource.c:1456:ldlm_resource_dump()) ### ### ns: testfs-OST0000-osc-ffff880037fea000 lock: ffff880037e29940/0xaa5a0b4e389da92d lrc: 4/0,1 mode: &lt;br/&gt;
PW/PW res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 rrc: 2 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0-&amp;gt;1048575) flags: 0x126400000000 nid: local remote: 0x59a587f3f1d1130e expref: -99 pid: 15181 tim&lt;br/&gt;
eout: 0 lvb_type: 1&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 15348:0:(ldlm_resource.c:1450:ldlm_resource_dump()) &amp;#8212; Resource: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff88003deae500) refcount = 2&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 15348:0:(ldlm_resource.c:1453:ldlm_resource_dump()) Granted locks (in reverse order):&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:3141:discard_cb()) page@ffff880038c3fa00&lt;span class=&quot;error&quot;&gt;&amp;#91;3 ffff88003ac29b38 1 0 1 ffff88003a78ebb8 (null)&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:3141:discard_cb()) vvp-page@ffff880038c3fa68(0:0) vm@ffffea000071dfb8 2000000000087f 2:0 ffff880038c3fa00 21504 lru&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:3141:discard_cb()) lov-page@ffff880038c3faa8, raid0&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:3141:discard_cb()) osc-page@ffff880038c3fb10 21504: 1&amp;lt; 0x845fed 258 0 + - &amp;gt; 2&amp;lt; 88080384 0 4096 0x0 0x520 | (null) ffff88003ac58500 ffff880&lt;br/&gt;
03cfdbe60 &amp;gt; 3&amp;lt; 1 0 0 &amp;gt; 4&amp;lt; 0 10 8 18446744073619427328 + | + - + - &amp;gt; 5&amp;lt; + - + - | 0 - | 6663 - -&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:3141:discard_cb()) end page@ffff880038c3fa00&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:3141:discard_cb()) discard dirty page?&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:2454:osc_teardown_async_page()) extent ffff88003da0eb10@&lt;/p&gt;
{[21504 -&amp;gt; 21759/21759], [2|0|-|cache|wi|ffff88003cfdbe60], [1048576|256|+|-|ffff8
8003c75d680|256|(null)]}
&lt;p&gt; trunc at 21504.&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_cache.c:2454:osc_teardown_async_page()) ### extent: ffff88003da0eb10&lt;br/&gt;
&amp;lt;3&amp;gt; ns: testfs-OST0000-osc-ffff880037fea000 lock: ffff88003c75d680/0xaa5a0b4e389daa3e lrc: 68/0,6 mode: PW/PW res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 rrc: 2 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt;&lt;br/&gt;
 (req 70254592-&amp;gt;71303167) flags: 0x20000000000 nid: local remote: 0x59a587f3f1d11323 expref: -99 pid: 15246 timeout: 0 lvb_type: 1&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_page.c:307:osc_page_delete()) page@ffff880038c3fa00&lt;span class=&quot;error&quot;&gt;&amp;#91;3 ffff88003ac29b38 4 0 1 (null) (null)&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_page.c:307:osc_page_delete()) vvp-page@ffff880038c3fa68(0:0) vm@ffffea000071dfb8 2000000000087f 2:0 ffff880038c3fa00 21504 lru&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_page.c:307:osc_page_delete()) lov-page@ffff880038c3faa8, raid0&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_page.c:307:osc_page_delete()) osc-page@ffff880038c3fb10 21504: 1&amp;lt; 0x845fed 258 0 + - &amp;gt; 2&amp;lt; 88080384 0 4096 0x0 0x520 | (null) ffff88003ac58500 ffff&lt;br/&gt;
88003cfdbe60 &amp;gt; 3&amp;lt; 0 0 0 &amp;gt; 4&amp;lt; 0 9 8 18446744073619361792 + | + - + - &amp;gt; 5&amp;lt; + - + - | 0 - | 6679 - -&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_page.c:307:osc_page_delete()) end page@ffff880038c3fa00&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 12670:0:(osc_page.c:307:osc_page_delete()) Trying to teardown failed: -16&lt;br/&gt;
&amp;lt;0&amp;gt;LustreError: 12670:0:(osc_page.c:308:osc_page_delete()) ASSERTION( 0 ) failed: &lt;br/&gt;
&amp;lt;0&amp;gt;LustreError: 12670:0:(osc_page.c:308:osc_page_delete()) LBUG&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 12670, comm: ldlm_bl_00&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021d875&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021de77&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa090dbfe&amp;gt;&amp;#93;&lt;/span&gt; osc_page_delete+0x46e/0x4e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037e9dd&amp;gt;&amp;#93;&lt;/span&gt; cl_page_delete0+0x7d/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037ebad&amp;gt;&amp;#93;&lt;/span&gt; cl_page_delete+0x3d/0x110 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0837d0d&amp;gt;&amp;#93;&lt;/span&gt; ll_invalidatepage+0x8d/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0846da5&amp;gt;&amp;#93;&lt;/span&gt; vvp_page_discard+0xc5/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037cfd8&amp;gt;&amp;#93;&lt;/span&gt; cl_page_invoid+0x68/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037d0e3&amp;gt;&amp;#93;&lt;/span&gt; cl_page_discard+0x13/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0919678&amp;gt;&amp;#93;&lt;/span&gt; discard_cb+0x88/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091946e&amp;gt;&amp;#93;&lt;/span&gt; osc_page_gang_lookup+0x1ae/0x330 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09195f0&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x0/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0919914&amp;gt;&amp;#93;&lt;/span&gt; osc_lock_discard_pages+0x144/0x240 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09195f0&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x0/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa090ff7b&amp;gt;&amp;#93;&lt;/span&gt; osc_lock_flush+0x8b/0x260 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09103f8&amp;gt;&amp;#93;&lt;/span&gt; osc_ldlm_blocking_ast+0x2a8/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa050e9dc&amp;gt;&amp;#93;&lt;/span&gt; ldlm_cancel_callback+0x6c/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052190a&amp;gt;&amp;#93;&lt;/span&gt; ldlm_cli_cancel_local+0x8a/0x470 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0526540&amp;gt;&amp;#93;&lt;/span&gt; ldlm_cli_cancel+0x60/0x360 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091022b&amp;gt;&amp;#93;&lt;/span&gt; osc_ldlm_blocking_ast+0xdb/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052a440&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_bl_callback+0x130/0x400 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052af24&amp;gt;&amp;#93;&lt;/span&gt; ldlm_bl_thread_main+0x484/0x700 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810672b0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052aaa0&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_bl_thread_main+0x0/0x700 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a101e&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x9e/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c28a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a0f80&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c280&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;br/&gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 12670, comm: ldlm_bl_00 Not tainted 2.6.32-573.3.1.el6.x86_64 #1&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81537c54&amp;gt;&amp;#93;&lt;/span&gt; ? panic+0xa7/0x16f&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021decb&amp;gt;&amp;#93;&lt;/span&gt; ? lbug_with_loc+0x9b/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa090dbfe&amp;gt;&amp;#93;&lt;/span&gt; ? osc_page_delete+0x46e/0x4e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037e9dd&amp;gt;&amp;#93;&lt;/span&gt; ? cl_page_delete0+0x7d/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037ebad&amp;gt;&amp;#93;&lt;/span&gt; ? cl_page_delete+0x3d/0x110 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0837d0d&amp;gt;&amp;#93;&lt;/span&gt; ? ll_invalidatepage+0x8d/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0846da5&amp;gt;&amp;#93;&lt;/span&gt; ? vvp_page_discard+0xc5/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037cfd8&amp;gt;&amp;#93;&lt;/span&gt; ? cl_page_invoid+0x68/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa037d0e3&amp;gt;&amp;#93;&lt;/span&gt; ? cl_page_discard+0x13/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0919678&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x88/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091946e&amp;gt;&amp;#93;&lt;/span&gt; ? osc_page_gang_lookup+0x1ae/0x330 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09195f0&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x0/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0919914&amp;gt;&amp;#93;&lt;/span&gt; ? osc_lock_discard_pages+0x144/0x240 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09195f0&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x0/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa090ff7b&amp;gt;&amp;#93;&lt;/span&gt; ? osc_lock_flush+0x8b/0x260 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09103f8&amp;gt;&amp;#93;&lt;/span&gt; ? osc_ldlm_blocking_ast+0x2a8/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa050e9dc&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_cancel_callback+0x6c/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052190a&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_cli_cancel_local+0x8a/0x470 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0526540&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_cli_cancel+0x60/0x360 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091022b&amp;gt;&amp;#93;&lt;/span&gt; ? osc_ldlm_blocking_ast+0xdb/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052a440&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_handle_bl_callback+0x130/0x400 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052af24&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_bl_thread_main+0x484/0x700 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810672b0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa052aaa0&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_bl_thread_main+0x0/0x700 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a101e&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x9e/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c28a&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a0f80&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c280&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;and another one, however don&apos;t know if it&apos;s related.&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 4278:0:(ldlm_resource.c:835:ldlm_resource_complain()) testfs-OST0001-osc-ffff88003c715c00: namespace resource &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff880037015800) refcount non&lt;br/&gt;
zero (1) after lock cleanup; forcing cleanup.&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 4278:0:(ldlm_resource.c:1450:ldlm_resource_dump()) &amp;#8212; Resource: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff880037015800) refcount = 2&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 4278:0:(ldlm_resource.c:1453:ldlm_resource_dump()) Granted locks (in reverse order):&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 4278:0:(ldlm_resource.c:1456:ldlm_resource_dump()) ### ### ns: testfs-OST0001-osc-ffff88003c715c00 lock: ffff880037269380/0x532f2161a177e283 lrc: 19/0,1 mode: &lt;br/&gt;
PW/PW res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 rrc: 2 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0-&amp;gt;1048575) flags: 0x126400000000 nid: local remote: 0x59a587f3f1d100b5 expref: -99 pid: 4040 time&lt;br/&gt;
out: 0 lvb_type: 1&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 790:0:(osc_io.c:1010:osc_req_attr_set()) page@ffff880022f02600&lt;span class=&quot;error&quot;&gt;&amp;#91;2 ffff88003be7db38 2 0 1 (null) ffff88003df849c0&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(osc_io.c:1010:osc_req_attr_set()) page@ffff88003dfc8000&lt;span class=&quot;error&quot;&gt;&amp;#91;2 ffff88003be7db38 2 0 1 (null) ffff88003b3982c0&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(osc_io.c:1010:osc_req_attr_set()) vvp-page@ffff88003dfc8068(0:0) vm@ffffea0000785428 2000000000282c 2:0 ffff88003dfc8000 19200 lru&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(osc_io.c:1010:osc_req_attr_set()) lov-page@ffff88003dfc80a8, raid0&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(osc_io.c:1010:osc_req_attr_set()) osc-page@ffff88003dfc8110 19200: 1&amp;lt; 0x845fed 258 0 + + &amp;gt; 2&amp;lt; 78643200 0 4096 0x5 0x520 | (null) ffff88003d970540 ffff88&lt;br/&gt;
003bc81e20 &amp;gt; 3&amp;lt; 1 12 0 &amp;gt; 4&amp;lt; 0 7 8 18446744073678077952 - | - - - - &amp;gt; 5&amp;lt; - - - - | 0 - | 0 - -&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(osc_io.c:1010:osc_req_attr_set()) end page@ffff88003dfc8000&lt;br/&gt;
&amp;lt;3&amp;gt;&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(osc_io.c:1010:osc_req_attr_set()) uncovered page!&lt;br/&gt;
&amp;lt;3&amp;gt;LustreError: 789:0:(ldlm_resource.c:1450:ldlm_resource_dump()) &amp;#8212; Resource: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x302:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff880037015800) refcount = 3&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 789, comm: ptlrpcd_00_00&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021d875&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09133fa&amp;gt;&amp;#93;&lt;/span&gt; osc_req_attr_set+0x55a/0x720 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0383829&amp;gt;&amp;#93;&lt;/span&gt; cl_req_attr_set+0xc9/0x220 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0904082&amp;gt;&amp;#93;&lt;/span&gt; osc_build_rpc+0x882/0x12d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091f623&amp;gt;&amp;#93;&lt;/span&gt; osc_io_unplug0+0x1133/0x1af0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0918428&amp;gt;&amp;#93;&lt;/span&gt; ? osc_ap_completion+0x1a8/0x550 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0917a3e&amp;gt;&amp;#93;&lt;/span&gt; ? osc_extent_put+0xbe/0x260 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0374f75&amp;gt;&amp;#93;&lt;/span&gt; ? lu_object_put+0x135/0x3b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09224b0&amp;gt;&amp;#93;&lt;/span&gt; osc_io_unplug+0x10/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0905593&amp;gt;&amp;#93;&lt;/span&gt; brw_interpret+0xac3/0x2320 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0546ee2&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_unregister_bulk+0xa2/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa053f4bc&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_unregister_reply+0x6c/0x810 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa053e2a4&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_send_new_req+0x154/0x980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0540551&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_check_set+0x331/0x1be0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056e443&amp;gt;&amp;#93;&lt;/span&gt; ptlrpcd_check+0x3d3/0x610 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056e8fa&amp;gt;&amp;#93;&lt;/span&gt; ptlrpcd+0x27a/0x500 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810672b0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056e680&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpcd+0x0/0x500 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a101e&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x9e/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c28a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a0f80&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c280&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;br/&gt;
&amp;lt;0&amp;gt;LustreError: 789:0:(osc_io.c:1020:osc_req_attr_set()) LBUG&lt;br/&gt;
&amp;lt;4&amp;gt;Pid: 789, comm: ptlrpcd_00_00&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;br/&gt;
&amp;lt;4&amp;gt;Call Trace:&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021d875&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa021de77&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0913410&amp;gt;&amp;#93;&lt;/span&gt; osc_req_attr_set+0x570/0x720 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0383829&amp;gt;&amp;#93;&lt;/span&gt; cl_req_attr_set+0xc9/0x220 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0904082&amp;gt;&amp;#93;&lt;/span&gt; osc_build_rpc+0x882/0x12d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091f623&amp;gt;&amp;#93;&lt;/span&gt; osc_io_unplug0+0x1133/0x1af0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0918428&amp;gt;&amp;#93;&lt;/span&gt; ? osc_ap_completion+0x1a8/0x550 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0917a3e&amp;gt;&amp;#93;&lt;/span&gt; ? osc_extent_put+0xbe/0x260 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0374f75&amp;gt;&amp;#93;&lt;/span&gt; ? lu_object_put+0x135/0x3b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09224b0&amp;gt;&amp;#93;&lt;/span&gt; osc_io_unplug+0x10/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0905593&amp;gt;&amp;#93;&lt;/span&gt; brw_interpret+0xac3/0x2320 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0546ee2&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_unregister_bulk+0xa2/0xac0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa053f4bc&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_unregister_reply+0x6c/0x810 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa053e2a4&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_send_new_req+0x154/0x980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0540551&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_check_set+0x331/0x1be0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056e443&amp;gt;&amp;#93;&lt;/span&gt; ptlrpcd_check+0x3d3/0x610 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056e8fa&amp;gt;&amp;#93;&lt;/span&gt; ptlrpcd+0x27a/0x500 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810672b0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa056e680&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpcd+0x0/0x500 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a101e&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x9e/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c28a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a0f80&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xc0&lt;br/&gt;
&amp;lt;4&amp;gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c280&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;</comment>
                            <comment id="127547" author="gerrit" created="Wed, 16 Sep 2015 19:59:23 +0000"  >&lt;p&gt;Jinshan Xiong (jinshan.xiong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16456&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16456&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: further OSC cleanup after eviction&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9dd22234a098bb2bea26c4694d91edc928e027ac&lt;/p&gt;</comment>
                            <comment id="127549" author="jay" created="Wed, 16 Sep 2015 20:01:20 +0000"  >&lt;p&gt;Hi Dongyang,&lt;/p&gt;

&lt;p&gt;Please try patch 16456 and see if it can fix the problem.&lt;/p&gt;

&lt;p&gt;Jinshan&lt;/p&gt;</comment>
                            <comment id="127741" author="lidongyang" created="Fri, 18 Sep 2015 03:41:03 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
I&apos;m using master which has 14989 landed already, plus patch set 3 of 16456:&lt;br/&gt;
The client crashed 5mins after I started the reproducer:&lt;br/&gt;
Sep 18 13:29:00 client kernel: LustreError: 16824:0:(ldlm_resource.c:887:ldlm_resource_complain()) testfs-OST0000-osc-ffff8800375c4000: namespace resource &lt;span class=&quot;error&quot;&gt;&amp;#91;0x303:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff88003d2abcc0) refcount nonzero (1) after lock cleanup; forcing cleanup.&lt;br/&gt;
Sep 18 13:29:00 client kernel: LustreError: 16824:0:(ldlm_resource.c:1502:ldlm_resource_dump()) &amp;#8212; Resource: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x303:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff88003d2abcc0) refcount = 3&lt;br/&gt;
Sep 18 13:29:00 client kernel: LustreError: 16824:0:(ldlm_resource.c:1505:ldlm_resource_dump()) Granted locks (in reverse order):&lt;br/&gt;
Sep 18 13:29:00 client kernel: LustreError: 16824:0:(ldlm_resource.c:1508:ldlm_resource_dump()) ### ### ns: testfs-OST0000-osc-ffff8800375c4000 lock: ffff88000b69d380/0x2104da78ff029aa9 lrc: 8/0,1 mode: PW/PW res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x303:0x0:0x0&amp;#93;&lt;/span&gt;.0 rrc: 3 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0-&amp;gt;1048575) flags: 0x126400000000 nid: local remote: 0x59a587f3f1d1d697 expref: -99 pid: 16588 timeout: 0 lvb_type: 1&lt;br/&gt;
Sep 18 13:29:00 client kernel: LustreError: 16824:0:(ldlm_resource.c:1502:ldlm_resource_dump()) &amp;#8212; Resource: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x303:0x0:0x0&amp;#93;&lt;/span&gt;.0 (ffff88003d2abcc0) refcount = 2&lt;br/&gt;
Sep 18 13:29:00 client kernel: LustreError: 16824:0:(ldlm_resource.c:1505:ldlm_resource_dump()) Granted locks (in reverse order):&lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:3134:discard_cb()) page@ffff88003d5a7600&lt;span class=&quot;error&quot;&gt;&amp;#91;3 ffff88003a513b38 1 0 1 ffff88003a5d6b78&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:3134:discard_cb()) vvp-page@ffff88003d5a7650(0:0) vm@ffffea00004439e0 2000000000087d 3:0 ffff88003d5a7600 25088 lru&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:3134:discard_cb()) lov-page@ffff88003d5a7690, raid0&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:3134:discard_cb()) osc-page@ffff88003d5a76f8 25088: 1&amp;lt; 0x845fed 258 0 + - &amp;gt; 2&amp;lt; 102760448 0 4096 0x0 0x520 | (null) ffff88003c479500 ffff88003a517e20 &amp;gt; 3&amp;lt; 1 0 0 &amp;gt; 4&amp;lt; 0 9 8 18446744073642434560 + | + - + - &amp;gt; 5&amp;lt; + - + - | 0 - | 5635 - -&amp;gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:3134:discard_cb()) end page@ffff88003d5a7600&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:3134:discard_cb()) discard dirty page?&lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:2447:osc_teardown_async_page()) extent ffff880039a18720@&lt;/p&gt;
{[25088 -&amp;gt; 25341/25343], [2|0|+|locking|wi|ffff88003a517e20], [1040384|254|+|-|ffff88000b69d580|256|ffff8800372b3520]}
&lt;p&gt; trunc at 25088.&lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_cache.c:2447:osc_teardown_async_page()) ### extent: ffff880039a18720&lt;br/&gt;
Sep 18 13:29:01 client kernel: ns: testfs-OST0000-osc-ffff8800375c4000 lock: ffff88000b69d580/0x2104da78ff029c0e lrc: 41/0,3 mode: PW/PW res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x303:0x0:0x0&amp;#93;&lt;/span&gt;.0 rrc: 2 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 102760448-&amp;gt;103809023) flags: 0x20000000000 nid: local remote: 0x59a587f3f1d1d6ac expref: -99 pid: 16683 timeout: 0 lvb_type: 1&lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:307:osc_page_delete()) page@ffff88003d5a7600&lt;span class=&quot;error&quot;&gt;&amp;#91;3 ffff88003a513b38 4 0 1 (null)&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:307:osc_page_delete()) vvp-page@ffff88003d5a7650(0:0) vm@ffffea00004439e0 2000000000087d 3:0 ffff88003d5a7600 25088 lru&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:307:osc_page_delete()) lov-page@ffff88003d5a7690, raid0&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:307:osc_page_delete()) osc-page@ffff88003d5a76f8 25088: 1&amp;lt; 0x845fed 258 0 + - &amp;gt; 2&amp;lt; 102760448 0 4096 0x0 0x520 | (null) ffff88003c479500 ffff88003a517e20 &amp;gt; 3&amp;lt; 0 0 0 &amp;gt; 4&amp;lt; 0 9 8 18446744073642434560 + | + - + - &amp;gt; 5&amp;lt; + - + - | 0 - | 5635 - -&amp;gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:307:osc_page_delete()) end page@ffff88003d5a7600&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:307:osc_page_delete()) Trying to teardown failed: -16&lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:308:osc_page_delete()) ASSERTION( 0 ) failed: &lt;/p&gt;

&lt;p&gt;Message from syslogd@client at Sep 18 13:29:01 ...&lt;br/&gt;
 kernel:LustreError: 13392:0:(osc_page.c:308:osc_page_delete()) ASSERTION( 0 ) failed: &lt;br/&gt;
Sep 18 13:29:01 client kernel: LustreError: 13392:0:(osc_page.c:308:osc_page_delete()) LBUG&lt;br/&gt;
Sep 18 13:29:01 client kernel: Pid: 13392, comm: ldlm_bl_00&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;br/&gt;
Sep 18 13:29:01 client kernel: Call Trace:&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa10ad875&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa10ade77&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa178bf9e&amp;gt;&amp;#93;&lt;/span&gt; osc_page_delete+0x46e/0x4e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa12019cd&amp;gt;&amp;#93;&lt;/span&gt; cl_page_delete0+0x7d/0x210 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1201b9d&amp;gt;&amp;#93;&lt;/span&gt; cl_page_delete+0x3d/0x110 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16b6d2d&amp;gt;&amp;#93;&lt;/span&gt; ll_invalidatepage+0x8d/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa16c5d85&amp;gt;&amp;#93;&lt;/span&gt; vvp_page_discard+0xc5/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa11fffc8&amp;gt;&amp;#93;&lt;/span&gt; cl_page_invoid+0x68/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa12000d3&amp;gt;&amp;#93;&lt;/span&gt; cl_page_discard+0x13/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1797158&amp;gt;&amp;#93;&lt;/span&gt; discard_cb+0x88/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa1796f4e&amp;gt;&amp;#93;&lt;/span&gt; osc_page_gang_lookup+0x1ae/0x330 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa17970d0&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x0/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa17973f4&amp;gt;&amp;#93;&lt;/span&gt; osc_lock_discard_pages+0x144/0x240 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa17970d0&amp;gt;&amp;#93;&lt;/span&gt; ? discard_cb+0x0/0x1e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa178e45b&amp;gt;&amp;#93;&lt;/span&gt; osc_lock_flush+0x8b/0x260 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa178e8d8&amp;gt;&amp;#93;&lt;/span&gt; osc_ldlm_blocking_ast+0x2a8/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa138ea57&amp;gt;&amp;#93;&lt;/span&gt; ldlm_cancel_callback+0x87/0x280 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81060530&amp;gt;&amp;#93;&lt;/span&gt; ? __dequeue_entity+0x30/0x50&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100969d&amp;gt;&amp;#93;&lt;/span&gt; ? __switch_to+0x7d/0x340&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa13a184a&amp;gt;&amp;#93;&lt;/span&gt; ldlm_cli_cancel_local+0x8a/0x470 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa13a6480&amp;gt;&amp;#93;&lt;/span&gt; ldlm_cli_cancel+0x60/0x360 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa178e70b&amp;gt;&amp;#93;&lt;/span&gt; osc_ldlm_blocking_ast+0xdb/0x3c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa13aa380&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_bl_callback+0x130/0x400 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa13aae64&amp;gt;&amp;#93;&lt;/span&gt; ldlm_bl_thread_main+0x484/0x700 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810672b0&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa13aa9e0&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_bl_thread_main+0x0/0x700 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810a101e&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x9e/0xc0&lt;br/&gt;
Sep 18 13:29:01 client kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c28a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;/p&gt;</comment>
                            <comment id="127750" author="jay" created="Fri, 18 Sep 2015 06:03:24 +0000"  >&lt;p&gt;Hi Dongyang,&lt;/p&gt;

&lt;p&gt;it certainly lasted longer on my VM node &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;Please upload your reproducer and I will restart working on this issue once I get access to a real hardware.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;/p&gt;</comment>
                            <comment id="127757" author="lidongyang" created="Fri, 18 Sep 2015 06:38:59 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
Hardware shouldn&apos;t matter as I&apos;m reproducing it on my VMs.&lt;br/&gt;
Here are the scripts I used.&lt;br/&gt;
On client, I&apos;m using IOR and when IOR dies because of eviction, just bring up new ones:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#!/bin/sh
&lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;true&lt;/span&gt;
&lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
	num=$(ps aux | grep IOR | wc -l)
	&lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; [ $num -lt 20 ]
	&lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
		/root/IOR/src/C/IOR -b 8g -w -e -E -t 1m -v -k -o /mnt/testfile &amp;amp;
		num=$(ps aux | grep IOR | wc -l)
	done
	sleep 1
done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Here I got 20 of them running at the same time.&lt;/p&gt;

&lt;p&gt;On the OSS:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#!/bin/sh
&lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;true&lt;/span&gt;
&lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;	
	&lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; ost in /proc/fs/lustre/obdfilter/*
	&lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
		&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; [ -n $(cat $ost/exports/&amp;lt;your client nid&amp;gt;/uuid) ]
		then
			echo $(cat $ost/exports/&amp;lt;your client nid&amp;gt;/uuid) &amp;gt; $ost/evict_client
		fi
	done
	sleep 10
done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;As you can see I&apos;m evicting the client over and over, and I&apos;m using 10secs as the interval. Again you can change this.&lt;/p&gt;

&lt;p&gt;Thanks&lt;br/&gt;
Dongyang&lt;/p&gt;</comment>
                            <comment id="127759" author="jay" created="Fri, 18 Sep 2015 06:58:13 +0000"  >&lt;p&gt;I did very similar thing with iozone but they wrote to different files, let me try your reproducer.&lt;/p&gt;</comment>
                            <comment id="127858" author="jay" created="Fri, 18 Sep 2015 22:36:50 +0000"  >&lt;p&gt;Please check the new patch and see if it can fix the problem.&lt;/p&gt;</comment>
                            <comment id="128050" author="lidongyang" created="Tue, 22 Sep 2015 05:44:52 +0000"  >&lt;p&gt;OK I&apos;ve been running the scripts with patchset 5 of 16456 for some time, seems the general IO is fine.&lt;br/&gt;
But when I tried the group lock, the client crashed immediately after eviction:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&amp;lt;0&amp;gt;LustreError: 1697:0:(osc_cache.c:2907:osc_cache_writeback_range()) ASSERTION( !ext-&amp;gt;oe_hp ) failed: 
&amp;lt;0&amp;gt;LustreError: 1697:0:(osc_cache.c:2907:osc_cache_writeback_range()) LBUG
&amp;lt;4&amp;gt;Pid: 1697, comm: ldlm_bl_11
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa021d875&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa021de77&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0922085&amp;gt;] osc_cache_writeback_range+0x1275/0x1280 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa090b545&amp;gt;] osc_lock_flush+0x175/0x260 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa090b8d8&amp;gt;] osc_ldlm_blocking_ast+0x2a8/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa050ba57&amp;gt;] ldlm_cancel_callback+0x87/0x280 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81060530&amp;gt;] ? __dequeue_entity+0x30/0x50
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100969d&amp;gt;] ? __switch_to+0x7d/0x340
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa051e84a&amp;gt;] ldlm_cli_cancel_local+0x8a/0x470 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa05234bc&amp;gt;] ldlm_cli_cancel+0x9c/0x3e0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa090b70b&amp;gt;] osc_ldlm_blocking_ast+0xdb/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810672c2&amp;gt;] ? default_wake_function+0x12/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0527400&amp;gt;] ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0527ee4&amp;gt;] ldlm_bl_thread_main+0x484/0x700 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810672b0&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0527a60&amp;gt;] ? ldlm_bl_thread_main+0x0/0x700 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a101e&amp;gt;] kthread+0x9e/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c28a&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a0f80&amp;gt;] ? kthread+0x0/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c280&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 1697, comm: ldlm_bl_11 Not tainted 2.6.32-573.3.1.el6.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81537c54&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa021decb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0922085&amp;gt;] ? osc_cache_writeback_range+0x1275/0x1280 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa090b545&amp;gt;] ? osc_lock_flush+0x175/0x260 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa090b8d8&amp;gt;] ? osc_ldlm_blocking_ast+0x2a8/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa050ba57&amp;gt;] ? ldlm_cancel_callback+0x87/0x280 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81060530&amp;gt;] ? __dequeue_entity+0x30/0x50
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100969d&amp;gt;] ? __switch_to+0x7d/0x340
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa051e84a&amp;gt;] ? ldlm_cli_cancel_local+0x8a/0x470 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa05234bc&amp;gt;] ? ldlm_cli_cancel+0x9c/0x3e0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa090b70b&amp;gt;] ? osc_ldlm_blocking_ast+0xdb/0x3c0 [osc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810672c2&amp;gt;] ? default_wake_function+0x12/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0527400&amp;gt;] ? ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0527ee4&amp;gt;] ? ldlm_bl_thread_main+0x484/0x700 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810672b0&amp;gt;] ? default_wake_function+0x0/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0527a60&amp;gt;] ? ldlm_bl_thread_main+0x0/0x700 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a101e&amp;gt;] ? kthread+0x9e/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c28a&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a0f80&amp;gt;] ? kthread+0x0/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c280&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="128139" author="jay" created="Tue, 22 Sep 2015 18:37:32 +0000"  >&lt;p&gt;Hi Dongyang, please upload your test cases.&lt;/p&gt;</comment>
                            <comment id="128312" author="lidongyang" created="Wed, 23 Sep 2015 23:26:19 +0000"  >&lt;p&gt;Hi Jinshan,&lt;br/&gt;
See my comment earlier on 28/Jul/15 8:39 PM.&lt;/p&gt;</comment>
                            <comment id="128326" author="jay" created="Thu, 24 Sep 2015 03:58:30 +0000"  >&lt;p&gt;Thanks. Please check if the latest patch can fix the problem&lt;/p&gt;</comment>
                            <comment id="128332" author="lidongyang" created="Thu, 24 Sep 2015 05:32:28 +0000"  >&lt;p&gt;With patch set 6 pf 16456, the client won&apos;t crash with the gplck reproducer and single eviction.&lt;br/&gt;
However, the client crashes if:&lt;br/&gt;
1. run the gplck reproducer, evict the client once.&lt;br/&gt;
2. after client reconnects, don&apos;t kill the gplck, run another instance of gplck with a different gid.&lt;br/&gt;
3. evict the client again, the client crashes with the same stack trace ASSERTION( !ext-&amp;gt;oe_hp ) failed&lt;/p&gt;

&lt;p&gt;This makes me think that the problem should be fixed in the ldlm layer like &lt;a href=&quot;http://review.whamcloud.com/#/c/14915/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/14915/&lt;/a&gt;&lt;br/&gt;
not in the osc layer.&lt;br/&gt;
Thanks&lt;br/&gt;
Dongyang&lt;/p&gt;</comment>
                            <comment id="128335" author="jay" created="Thu, 24 Sep 2015 06:08:07 +0000"  >&lt;p&gt;Hi Dongyang, the problem may be fixed in patch set 7 but feel free to use patch 14915 on your own site.&lt;/p&gt;</comment>
                            <comment id="128336" author="jay" created="Thu, 24 Sep 2015 06:22:52 +0000"  >&lt;p&gt;this problem can only happen at eviction so even patch 14989 is good enough for daily use.&lt;/p&gt;</comment>
                            <comment id="128357" author="jay" created="Thu, 24 Sep 2015 14:57:48 +0000"  >&lt;p&gt;Hi Dongyang, I took a look at your latest patch 14915 and I suggest you shouldn&apos;t apply this patch to your own site because it&apos;s not production ready yet.&lt;/p&gt;</comment>
                            <comment id="128998" author="gerrit" created="Thu, 1 Oct 2015 14:22:22 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/16456/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16456/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: further OSC cleanup after eviction&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: e8b421531c166b91ab5c1f417570c544bcdd050c&lt;/p&gt;</comment>
                            <comment id="129400" author="gerrit" created="Tue, 6 Oct 2015 00:51:30 +0000"  >&lt;p&gt;Jinshan Xiong (jinshan.xiong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16727&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16727&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: check oo_npages inside spin lock&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e220fbbfa93069a783430d15a26dc3518006bc1c&lt;/p&gt;</comment>
                            <comment id="131455" author="gerrit" created="Sat, 24 Oct 2015 00:37:04 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/16727/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16727/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: faulty assertion in osc_object_prune()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c5e84d53198142919b526fd304c6782005e38bdb&lt;/p&gt;</comment>
                            <comment id="131478" author="pjones" created="Sun, 25 Oct 2015 12:45:50 +0000"  >&lt;p&gt;Landed for 2.8&lt;/p&gt;</comment>
                            <comment id="133591" author="icostelloddn" created="Mon, 16 Nov 2015 11:19:19 +0000"  >&lt;p&gt;This bug is not resolved. At a customer site today, one of the issues reported with the patch series was reproduced:&lt;/p&gt;

&lt;p&gt;Here is the gathered information from one of the engineers:&lt;/p&gt;

&lt;p&gt;Nov 13 21:22:32 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;41261.553520&amp;#93;&lt;/span&gt; LustreError: 11-0: work-OST0018-osc-ffff880f4a556c00: Communicating with 10.148.254.146@o2ib, operation obd_ping failed with -107.&lt;br/&gt;
Nov 13 21:22:32 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;41261.553537&amp;#93;&lt;/span&gt; Lustre: work-OST0018-osc-ffff880f4a556c00: Connection to work-OST0018 (at 10.148.254.146@o2ib) was lost; in progress operations using this service will wait for recovery to complete&lt;br/&gt;
Nov 13 21:22:32 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;41261.553744&amp;#93;&lt;/span&gt; LustreError: 167-0: work-OST0018-osc-ffff880f4a556c00: This client was evicted by work-OST0018; in progress operations using this service will fail.&lt;/p&gt;


&lt;p&gt;Nov 13 21:22:19 oss3 kernel: LustreError: 0:0:(ldlm_lockd.c:344:waiting_locks_callback()) ### lock callback timer expired after 100s: evicting client at 10.148.4.3@o2ib  ns: filter-work-OST0018_UUID lock: ffff881bf46fa1c0/0x1b850f2869999e97 lrc: 3/0,0 mode: PW/PW res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0xcc6a24:0x0:0x0&amp;#93;&lt;/span&gt;.0 rrc: 31 type: EXT &lt;span class=&quot;error&quot;&gt;&amp;#91;0-&amp;gt;18446744073709551615&amp;#93;&lt;/span&gt; (req 0-&amp;gt;18446744073709551615) flags: 0x60000080010020 nid: 10.148.4.3@o2ib remote: 0x7a3c9c39e0502fcc expref: 5 pid: 20187 timeout: 4335795111 lvb_type: 0&lt;/p&gt;

&lt;p&gt;r2i1n4:~ # lctl dl&lt;br/&gt;
  0 UP mgc MGC10.148.254.141@o2ib 2dc4c54f-dab6-1190-fca7-d7747093f936 5&lt;br/&gt;
  &#8230; &lt;br/&gt;
 35 UP osc work-OST0017-osc-ffff880f4a556c00 9d007967-5de8-eb2e-eaf9-99d4b17d74e6 5&lt;br/&gt;
 36 IN osc work-OST0018-osc-ffff880f4a556c00 9d007967-5de8-eb2e-eaf9-99d4b17d74e6 5&lt;br/&gt;
 37 UP osc work-OST0019-osc-ffff880f4a556c00 9d007967-5de8-eb2e-eaf9-99d4b17d74e6 5&lt;/p&gt;

&lt;p&gt;r2i1n4:~ # lctl --device 36 activate&lt;br/&gt;
error: activate: failed: Invalid argument&lt;/p&gt;

&lt;p&gt;r2i1n4:~ # cat /proc/fs/lustre/osc/work-OST0018-osc-ffff880f4a556c00/import&lt;br/&gt;
import:&lt;br/&gt;
    name: work-OST0018-osc-ffff880f4a556c00&lt;br/&gt;
    target: work-OST0018_UUID&lt;br/&gt;
    state: EVICTED&lt;br/&gt;
    instance: 21&lt;br/&gt;
    connect_flags: [ write_grant, server_lock, version, request_portal, truncate_lock, max_byte_per_rpc, early_lock_cancel, adaptive_timeouts, alt_checksum_algorithm, fid_is_enabled, version_recovery, full20, layout_lock, 64bithash, object_max_bytes, jobstats, einprogress, lvb_type]&lt;br/&gt;
    import_flags: [ invalid, replayable, pingable, connect_tried ]&lt;br/&gt;
    connection:&lt;br/&gt;
       failover_nids: [ 10.148.254.146@o2ib, 10.148.254.147@o2ib ]&lt;br/&gt;
       current_connection: 10.148.254.146@o2ib&lt;br/&gt;
       connection_attempts: 2&lt;br/&gt;
       generation: 2&lt;br/&gt;
       in-progress_invalidations: 1&lt;br/&gt;
    rpcs:&lt;br/&gt;
       inflight: 0&lt;br/&gt;
       unregistering: 0&lt;br/&gt;
       timeouts: 0&lt;br/&gt;
       avg_waittime: 803 usec&lt;br/&gt;
    service_estimates:&lt;br/&gt;
       services: 1 sec&lt;br/&gt;
       network: 1 sec&lt;br/&gt;
    transactions:&lt;br/&gt;
       last_replay: 0&lt;br/&gt;
       peer_committed: 90194391109&lt;br/&gt;
       last_checked: 90194391109&lt;br/&gt;
    read_data_averages:&lt;br/&gt;
       bytes_per_rpc: 19550&lt;br/&gt;
       usec_per_rpc: 938&lt;br/&gt;
       MB_per_sec: 20.84&lt;br/&gt;
    write_data_averages:&lt;br/&gt;
       bytes_per_rpc: 508&lt;br/&gt;
       usec_per_rpc: 5386&lt;br/&gt;
       MB_per_sec: 0.09&lt;/p&gt;



&lt;p&gt;r2i1n4:~ # ps -ef  | grep ll_imp_inval&lt;br/&gt;
root       832     2  0 Nov13 ?        00:00:00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ll_imp_inval&amp;#93;&lt;/span&gt;&lt;br/&gt;
root     25849 25765  0 14:18 pts/0    00:00:00 grep ll_imp_inval&lt;/p&gt;

&lt;p&gt;Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717827&amp;#93;&lt;/span&gt; ll_imp_inval    S ffff881ffc253b00     0   832      2 0x00000000&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717832&amp;#93;&lt;/span&gt;  ffff881ffc34dc50 0000000000000046 ffff881ffc34c010 0000000000010900&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717838&amp;#93;&lt;/span&gt;  0000000000010900 0000000000010900 0000000000010900 ffff881ffc34dfd8&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717845&amp;#93;&lt;/span&gt;  ffff881ffc34dfd8 0000000000010900 ffff881fd7ffa580 ffff881020f2c300&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717852&amp;#93;&lt;/span&gt; Call Trace:&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717879&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bcf361&amp;gt;&amp;#93;&lt;/span&gt; osc_object_invalidate+0x1c1/0x280 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717907&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb5bae&amp;gt;&amp;#93;&lt;/span&gt; osc_ldlm_resource_invalidate+0xae/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717930&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0716d68&amp;gt;&amp;#93;&lt;/span&gt; cfs_hash_for_each_relax+0x178/0x340 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.717971&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0718fc0&amp;gt;&amp;#93;&lt;/span&gt; cfs_hash_for_each_nolock+0x70/0x1c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.718008&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bc989c&amp;gt;&amp;#93;&lt;/span&gt; osc_import_event+0xfac/0x1420 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.718064&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a88592&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_invalidate_import+0x292/0x890 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.718151&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a8a702&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_invalidate_import_thread+0x42/0x2e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.718197&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810843f6&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
Nov 16 14:22:19 r2i1n4 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;274907.718205&amp;#93;&lt;/span&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8146f364&amp;gt;&amp;#93;&lt;/span&gt; kernel_thread_helper+0x4/0x10&lt;/p&gt;
</comment>
                            <comment id="133592" author="icostelloddn" created="Mon, 16 Nov 2015 11:19:49 +0000"  >&lt;p&gt;client messages&lt;/p&gt;</comment>
                            <comment id="133593" author="icostelloddn" created="Mon, 16 Nov 2015 11:20:01 +0000"  >&lt;p&gt;server message&lt;/p&gt;</comment>
                            <comment id="133594" author="icostelloddn" created="Mon, 16 Nov 2015 11:21:00 +0000"  >&lt;p&gt;uploaded 2 logs file detailing the problem, customer is running the ddn-ieel branch 2.5.39-ddn7.&lt;/p&gt;</comment>
                            <comment id="133595" author="icostelloddn" created="Mon, 16 Nov 2015 11:22:10 +0000"  >&lt;p&gt;In short, clients were evicted suddenly and the OSTs stayed INACTIVE for those clients until the clients were restarted forcibly. &lt;/p&gt;</comment>
                            <comment id="133596" author="icostelloddn" created="Mon, 16 Nov 2015 11:30:14 +0000"  >&lt;p&gt;NOTE: the 2.5.39-ddn7 has the patches:&lt;/p&gt;

&lt;p&gt;20401c7 &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: faulty assertion in osc_object_prune()&lt;br/&gt;
b08341e &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: further OSC cleanup after eviction&lt;br/&gt;
42e9257 &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6271&quot; title=&quot;(osc_cache.c:3150:discard_cb()) ASSERTION( (!(page-&amp;gt;cp_type == CPT_CACHEABLE) || (!PageDirty(cl_page_vmpage(page)))) ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6271&quot;&gt;&lt;del&gt;LU-6271&lt;/del&gt;&lt;/a&gt; osc: handle osc eviction correctly&lt;/p&gt;

&lt;p&gt;I think we should seriously consider using patch (written by Dongyang Li):&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/14915/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/14915/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I am going to test the above patch at NCI, where we are able to reproduce the crash and inactive OST (due to client eviction)...&lt;/p&gt;</comment>
                            <comment id="133722" author="jay" created="Tue, 17 Nov 2015 17:43:57 +0000"  >&lt;p&gt;From the log, obviously the root cause was a deadlock problem. It has nothing to do with these patches.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov 16 14:22:19 r2i1n4 kernel: [274907.716536] shald_dpd.out   D ffff88201d872010     0   825    772 0x00000000
Nov 16 14:22:19 r2i1n4 kernel: [274907.716541]  ffff88201d873c30 0000000000000082 ffff88201d872010 0000000000010900
Nov 16 14:22:19 r2i1n4 kernel: [274907.716548]  0000000000010900 0000000000010900 0000000000010900 ffff88201d873fd8
Nov 16 14:22:19 r2i1n4 kernel: [274907.716554]  ffff88201d873fd8 0000000000010900 ffff881fbd5ac440 ffff881020932600
Nov 16 14:22:19 r2i1n4 kernel: [274907.716561] Call Trace:
Nov 16 14:22:19 r2i1n4 kernel: [274907.716568]  [&amp;lt;ffffffff814651a8&amp;gt;] __mutex_lock_slowpath+0xf8/0x150
Nov 16 14:22:19 r2i1n4 kernel: [274907.716575]  [&amp;lt;ffffffff81464c3a&amp;gt;] mutex_lock+0x1a/0x40
Nov 16 14:22:19 r2i1n4 kernel: [274907.716585]  [&amp;lt;ffffffff810fd10a&amp;gt;] generic_file_aio_write+0x3a/0xb0
Nov 16 14:22:19 r2i1n4 kernel: [274907.716622]  [&amp;lt;ffffffffa0d51e2e&amp;gt;] vvp_io_write_start+0x12e/0x530 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.716707]  [&amp;lt;ffffffffa088d539&amp;gt;] cl_io_start+0x69/0x140 [obdclass]
Nov 16 14:22:19 r2i1n4 kernel: [274907.716790]  [&amp;lt;ffffffffa0890993&amp;gt;] cl_io_loop+0xa3/0x190 [obdclass]
Nov 16 14:22:19 r2i1n4 kernel: [274907.716860]  [&amp;lt;ffffffffa0cf0cc7&amp;gt;] ll_file_io_generic+0x757/0x830 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.716892]  [&amp;lt;ffffffffa0d01e14&amp;gt;] ll_file_aio_write+0x1b4/0x5d0 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.716926]  [&amp;lt;ffffffffa0d02415&amp;gt;] ll_file_write+0x1e5/0x270 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.716942]  [&amp;lt;ffffffff8115ed4e&amp;gt;] vfs_write+0xce/0x140
Nov 16 14:22:19 r2i1n4 kernel: [274907.716951]  [&amp;lt;ffffffff8115eec3&amp;gt;] sys_write+0x53/0xa0
Nov 16 14:22:19 r2i1n4 kernel: [274907.716958]  [&amp;lt;ffffffff8146e1f2&amp;gt;] system_call_fastpath+0x16/0x1b
Nov 16 14:22:19 r2i1n4 kernel: [274907.716971]  [&amp;lt;00007fffec69579d&amp;gt;] 0x7fffec69579c
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov 16 14:22:19 r2i1n4 kernel: [274907.717055] shald_dpd.out   D ffff88201dc04010     0   826    772 0x00000000
Nov 16 14:22:19 r2i1n4 kernel: [274907.717060]  ffff88201dc05b70 0000000000000086 ffff88201dc04010 0000000000010900
Nov 16 14:22:19 r2i1n4 kernel: [274907.717067]  0000000000010900 0000000000010900 0000000000010900 ffff88201dc05fd8
Nov 16 14:22:19 r2i1n4 kernel: [274907.717073]  ffff88201dc05fd8 0000000000010900 ffff881fd7fce600 ffffffff81a11020
Nov 16 14:22:19 r2i1n4 kernel: [274907.717080] Call Trace:
Nov 16 14:22:19 r2i1n4 kernel: [274907.717087]  [&amp;lt;ffffffff814651a8&amp;gt;] __mutex_lock_slowpath+0xf8/0x150
Nov 16 14:22:19 r2i1n4 kernel: [274907.717094]  [&amp;lt;ffffffff81464c3a&amp;gt;] mutex_lock+0x1a/0x40
Nov 16 14:22:19 r2i1n4 kernel: [274907.717121]  [&amp;lt;ffffffffa0d1adb0&amp;gt;] ll_setattr_raw+0x2c0/0xb60 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.717143]  [&amp;lt;ffffffff81178faf&amp;gt;] notify_change+0x19f/0x2f0
Nov 16 14:22:19 r2i1n4 kernel: [274907.717151]  [&amp;lt;ffffffff8115d517&amp;gt;] do_truncate+0x57/0x80
Nov 16 14:22:19 r2i1n4 kernel: [274907.717160]  [&amp;lt;ffffffff8116c053&amp;gt;] do_last+0x603/0x800
Nov 16 14:22:19 r2i1n4 kernel: [274907.717168]  [&amp;lt;ffffffff8116ceb9&amp;gt;] path_openat+0xd9/0x420
Nov 16 14:22:19 r2i1n4 kernel: [274907.717175]  [&amp;lt;ffffffff8116d33c&amp;gt;] do_filp_open+0x4c/0xc0
Nov 16 14:22:19 r2i1n4 kernel: [274907.717183]  [&amp;lt;ffffffff8115de5f&amp;gt;] do_sys_open+0x17f/0x250
Nov 16 14:22:19 r2i1n4 kernel: [274907.717191]  [&amp;lt;ffffffff8146e1f2&amp;gt;] system_call_fastpath+0x16/0x1b
Nov 16 14:22:19 r2i1n4 kernel: [274907.717202]  [&amp;lt;00007fffeb9057bd&amp;gt;] 0x7fffeb9057bc
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov 16 14:22:19 r2i1n4 kernel: [274907.717285] shald_dpd.out   D 0000000000000000     0   827    772 0x00000000
Nov 16 14:22:19 r2i1n4 kernel: [274907.717290]  ffff881fbd5afa60 0000000000000086 ffff881fbd5ae010 0000000000010900
Nov 16 14:22:19 r2i1n4 kernel: [274907.717297]  0000000000010900 0000000000010900 0000000000010900 ffff881fbd5affd8
Nov 16 14:22:19 r2i1n4 kernel: [274907.717303]  ffff881fbd5affd8 0000000000010900 ffff881fa07c20c0 ffff881020fa2080
Nov 16 14:22:19 r2i1n4 kernel: [274907.717310] Call Trace:
Nov 16 14:22:19 r2i1n4 kernel: [274907.717318]  [&amp;lt;ffffffff814661a5&amp;gt;] rwsem_down_failed_common+0xb5/0x160
Nov 16 14:22:19 r2i1n4 kernel: [274907.717329]  [&amp;lt;ffffffff81264533&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
Nov 16 14:22:19 r2i1n4 kernel: [274907.717337]  [&amp;lt;ffffffff814654bc&amp;gt;] down_write+0x1c/0x20
Nov 16 14:22:19 r2i1n4 kernel: [274907.717370]  [&amp;lt;ffffffffa0d51159&amp;gt;] vvp_io_setattr_start+0x129/0x170 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.717447]  [&amp;lt;ffffffffa088d539&amp;gt;] cl_io_start+0x69/0x140 [obdclass]
Nov 16 14:22:19 r2i1n4 kernel: [274907.717528]  [&amp;lt;ffffffffa0890993&amp;gt;] cl_io_loop+0xa3/0x190 [obdclass]
Nov 16 14:22:19 r2i1n4 kernel: [274907.717605]  [&amp;lt;ffffffffa0d4905f&amp;gt;] cl_setattr_ost+0x24f/0x2b0 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.717663]  [&amp;lt;ffffffffa0d1b2e4&amp;gt;] ll_setattr_raw+0x7f4/0xb60 [lustre]
Nov 16 14:22:19 r2i1n4 kernel: [274907.717683]  [&amp;lt;ffffffff81178faf&amp;gt;] notify_change+0x19f/0x2f0
Nov 16 14:22:19 r2i1n4 kernel: [274907.717692]  [&amp;lt;ffffffff8115d517&amp;gt;] do_truncate+0x57/0x80
Nov 16 14:22:19 r2i1n4 kernel: [274907.717699]  [&amp;lt;ffffffff8116c053&amp;gt;] do_last+0x603/0x800
Nov 16 14:22:19 r2i1n4 kernel: [274907.717708]  [&amp;lt;ffffffff8116ceb9&amp;gt;] path_openat+0xd9/0x420
Nov 16 14:22:19 r2i1n4 kernel: [274907.717715]  [&amp;lt;ffffffff8116d33c&amp;gt;] do_filp_open+0x4c/0xc0
Nov 16 14:22:19 r2i1n4 kernel: [274907.717723]  [&amp;lt;ffffffff8115de5f&amp;gt;] do_sys_open+0x17f/0x250
Nov 16 14:22:19 r2i1n4 kernel: [274907.717731]  [&amp;lt;ffffffff8146e1f2&amp;gt;] system_call_fastpath+0x16/0x1b 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;You could try patch 14915 but most likely you&apos;re going to end up with the same problem.&lt;/p&gt;</comment>
                            <comment id="133729" author="jay" created="Tue, 17 Nov 2015 18:05:46 +0000"  >&lt;p&gt;From the backtrace, the truncate process was waiting for i_alloc_sem while holding inode mutex but nobody should have held i_alloc_sem from the stack. Unfortunately I don&apos;t have a sle11-sp3 kernel source code on hand otherwise I could take a look.&lt;/p&gt;

&lt;p&gt;One alternative way is to get a kernel dump when you see this problem and search the variable address of alloc_sem probably you can find it on some proc&apos;s stack.&lt;/p&gt;</comment>
                            <comment id="133747" author="pjones" created="Tue, 17 Nov 2015 19:48:49 +0000"  >&lt;p&gt;To nip this in the bud - it seems like any ongoing work should move to a new ticket...&lt;/p&gt;</comment>
                            <comment id="133752" author="jay" created="Tue, 17 Nov 2015 20:26:58 +0000"  >&lt;blockquote&gt;
&lt;p&gt;To nip this in the bud - it seems like any ongoing work should move to a new ticket...&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;To be clear, the customer was using a build with their own patches, there is no evidence that this issue can be seen on master.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10120">
                    <name>Blocker</name>
                                            <outwardlinks description="is blocking">
                                        <issuelink>
            <issuekey id="31067">LU-6843</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="31815">LU-7066</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="55319">LU-12142</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="18817" name="iwc189.lbug.log.txt.gz" size="239" author="di.wang" created="Mon, 31 Aug 2015 19:53:00 +0000"/>
                            <attachment id="17067" name="iwc260.log.gz" size="5041675" author="cliffw" created="Mon, 23 Feb 2015 17:48:52 +0000"/>
                            <attachment id="18518" name="l-23.LU-6271.txt.gz" size="233" author="cliffw" created="Wed, 29 Jul 2015 17:55:00 +0000"/>
                            <attachment id="19043" name="lustre-log.iwc37.txt" size="337069" author="cliffw" created="Thu, 1 Oct 2015 17:43:04 +0000"/>
                            <attachment id="19616" name="r2i1n4.messages.gz" size="98417" author="icostelloddn" created="Mon, 16 Nov 2015 11:19:49 +0000"/>
                            <attachment id="19617" name="server_messages.tar.gz" size="946075" author="icostelloddn" created="Mon, 16 Nov 2015 11:20:01 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx6vj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>17581</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>