<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:15:25 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1299] running truncated executable causes spewing of lock debug messages</title>
                <link>https://jira.whamcloud.com/browse/LU-1299</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;A user was running an executable that appeared to be truncated.  This caused the node to OOM and the Lustre client subsequently started spewing lock debugging messages:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2012-04-10 09:32:58 LustreError: 26191:0:(cl_lock.c:1426:cl_unuse_locked()) unuse return -4
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked()) lock@ffff8805b8374c18[3 5 -4 1 0 00000006] P(0):[0, 18446744073709551615]@[0x22c36cd0b:0x1:0x0] {
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     vvp@ffff88058a495790: 
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     lov@ffff8803e7fd0e98: 2
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     0 0: lock@ffff880227a9ce78[1 5 0 1 1 00000000] R(1):[0, 18446744073709551615]@[0x1003d0000:0x91707b:0x0] {
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     lovsub@ffff8801f0d30c60: [0 ffff8803e7fd03a0 R(1):[0, 18446744073709551615]@[0x22c36cd0b:0x1:0x0]] [0 ffff8803e7fd0e98 P(0):[0, 18446744073709551615]@[0x22c36cd0b:0x1:0x0]] [0 ffff8803e14c1f68 P(0):[0, 18446744073709551615]@[0x22c36cd0b:0x1:0x0]] 
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     osc@ffff8801e0ae7648: ffff88011fe84900 00101001 0x40e1ce45be5b325f 4 (null) size: 1048576 mtime: 1333818831 atime: 1334070919 ctime: 1333818831 blocks: 2048
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked()) } lock@ffff880227a9ce78
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     1 0: lock@ffff880227a9cd98[2 3 0 2 2 00000000] R(1):[0, 18446744073709551615]@[0x101d10000:0x93426:0x0] {
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     lovsub@ffff8801f0d30c20: [0 ffff8803e7fd00c8 R(1):[256, 18446744073709551615]@[0x22c36cd0b:0x1:0x0]] [1 ffff8803e7fd0e98 P(0):[0, 18446744073709551615]@[0x22c36cd0b:0x1:0x0]] [1 ffff8803e14c1f68 P(0):[0, 18446744073709551615]@[0x22c36cd0b:0x1:0x0]] 
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked())     osc@ffff8801e0ae7590: ffff88011fe84240 00101001 0x40e1ce45be5b3266 3 ffff8803364e9e48 size: 0 mtime: 1333818824 atime: 1333818666 ctime: 1333818824 blocks: 0
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked()) } lock@ffff880227a9cd98
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked()) 
2012-04-10 09:32:58 LustreError: 24081:0:(cl_lock.c:1426:cl_unuse_locked()) } lock@ffff8805b8374c18
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;These messages appear many times a second.  The processes seem to be spinning in the page fault handler.  If you catch it while it&apos;s not active the backtrace looks like this:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 22135  TASK: ffff880222c0e080  CPU: 1   COMMAND: &quot;test&quot;
 #0 [ffff880336539158] schedule at ffffffff814eeee0
 #1 [ffff880336539220] __mutex_lock_slowpath at ffffffff814f057e
 #2 [ffff880336539290] mutex_lock at ffffffff814f041b
 #3 [ffff8803365392b0] cl_lock_mutex_get at ffffffffa05d4d45 [obdclass]
 #4 [ffff8803365392f0] cl_lock_enclosure at ffffffffa05d789f [obdclass]
 #5 [ffff880336539330] cl_lock_closure_build at ffffffffa05d7963 [obdclass]
 #6 [ffff880336539380] lovsub_lock_closure at ffffffffa095c19e [lov]
 #7 [ffff8803365393c0] cl_lock_closure_build at ffffffffa05d79a0 [obdclass]
 #8 [ffff880336539410] lov_sublock_lock at ffffffffa095485b [lov]
 #9 [ffff880336539470] lov_lock_enqueue at ffffffffa0955f02 [lov]
#10 [ffff880336539500] cl_enqueue_try at ffffffffa05d66cc [obdclass]
#11 [ffff880336539550] cl_enqueue_locked at ffffffffa05d7ced [obdclass]
#12 [ffff880336539590] cl_lock_request at ffffffffa05d7fae [obdclass]
#13 [ffff8803365395f0] cl_glimpse_lock at ffffffffa0a0e5fb [lustre]
#14 [ffff880336539650] ccc_prep_size at ffffffffa0a11b8a [lustre]
#15 [ffff8803365396b0] vvp_io_fault_start at ffffffffa0a165db [lustre]
#16 [ffff880336539720] cl_io_start at ffffffffa05d8a5a [obdclass]
#17 [ffff880336539760] cl_io_loop at ffffffffa05dc094 [obdclass]
#18 [ffff8803365397b0] ll_fault0 at ffffffffa0a00164 [lustre]
#19 [ffff880336539810] ll_fault at ffffffffa0a00428 [lustre]
#20 [ffff880336539860] __do_fault at ffffffff8113bad4
#21 [ffff8803365398f0] handle_pte_fault at ffffffff8113c087
#22 [ffff8803365399d0] handle_mm_fault at ffffffff8113ccc4
#23 [ffff880336539a20] __do_page_fault at ffffffff81042b79
#24 [ffff880336539b40] do_page_fault at ffffffff814f495e
#25 [ffff880336539b70] page_fault at ffffffff814f1d15
    [exception RIP: __clear_user+63]
    RIP: ffffffff812791df  RSP: ffff880336539c28  RFLAGS: 00010202
    RAX: 0000000000000000  RBX: 0000000000df44a8  RCX: 000000000000016b
    RDX: 0000000000000000  RSI: 0000000000000b58  RDI: 0000000000df44a8
    RBP: ffff880336539c38   R8: 0000000000000000   R9: 0000000000000008
    R10: ffff8801a44c7318  R11: 0000000000000000  R12: 0000000000000b58
    R13: 0000000000000000  R14: ffff880336539f58  R15: 0000000000df44a8
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
#26 [ffff880336539c40] clear_user at ffffffff81279248
#27 [ffff880336539c60] padzero at ffffffff811cc93d
#28 [ffff880336539c70] load_elf_binary at ffffffff811ce96e
#29 [ffff880336539e40] search_binary_handler at ffffffff8117f5eb
#30 [ffff880336539eb0] do_execve at ffffffff81180779
#31 [ffff880336539f20] sys_execve at ffffffff810095ea
#32 [ffff880336539f50] stub_execve at ffffffff8100b54a
    RIP: 0000003d6f6ab4a7  RSP: 00007fffffffa628  RFLAGS: 00000203
    RAX: 000000000000003b  RBX: 00002aaaaaae05a8  RCX: ffffffffffffffff
    RDX: 0000000000868c08  RSI: 000000000088afc8  RDI: 0000000000869808
    RBP: 000000000088ab08   R8: 0000000000000000   R9: 00007fffffffa598
    R10: 00007fffffffa54c  R11: 0000000000000203  R12: 0000000000869808
    R13: 000000000087c628  R14: 000000000088afc8  R15: 000000000087ce48
    ORIG_RAX: 000000000000003b  CS: 0033  SS: 002b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;or this:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 22135  TASK: ffff880222c0e080  CPU: 8   COMMAND: &quot;test&quot;
 #0 [ffff880336539118] schedule at ffffffff814eeee0
 #1 [ffff8803365391e0] __mutex_lock_slowpath at ffffffff814f057e
 #2 [ffff880336539250] mutex_lock at ffffffff814f041b
 #3 [ffff880336539270] cl_lock_mutex_get at ffffffffa05d4d45 [obdclass]
 #4 [ffff8803365392b0] cl_lock_enclosure at ffffffffa05d789f [obdclass]
 #5 [ffff8803365392f0] cl_lock_closure_build at ffffffffa05d7963 [obdclass]
 #6 [ffff880336539340] lovsub_lock_closure at ffffffffa095c19e [lov]
 #7 [ffff880336539380] cl_lock_closure_build at ffffffffa05d79a0 [obdclass]
 #8 [ffff8803365393d0] lov_sublock_lock at ffffffffa095485b [lov]
 #9 [ffff880336539430] lov_lock_delete at ffffffffa0954b43 [lov]
#10 [ffff880336539490] cl_lock_delete0 at ffffffffa05d4425 [obdclass]
#11 [ffff8803365394c0] cl_lock_delete at ffffffffa05d4713 [obdclass]
#12 [ffff8803365394e0] cl_lock_finish at ffffffffa05d68c7 [obdclass]
#13 [ffff880336539500] cl_lock_hold_mutex at ffffffffa05d6cd2 [obdclass]
#14 [ffff880336539590] cl_lock_request at ffffffffa05d7f8e [obdclass]
#15 [ffff8803365395f0] cl_glimpse_lock at ffffffffa0a0e5fb [lustre]
#16 [ffff880336539650] ccc_prep_size at ffffffffa0a11b8a [lustre]
#17 [ffff8803365396b0] vvp_io_fault_start at ffffffffa0a165db [lustre]
#18 [ffff880336539720] cl_io_start at ffffffffa05d8a5a [obdclass]
#19 [ffff880336539760] cl_io_loop at ffffffffa05dc094 [obdclass]
#20 [ffff8803365397b0] ll_fault0 at ffffffffa0a00164 [lustre]
#21 [ffff880336539810] ll_fault at ffffffffa0a00428 [lustre]
#22 [ffff880336539860] __do_fault at ffffffff8113bad4
#23 [ffff8803365398f0] handle_pte_fault at ffffffff8113c087
#24 [ffff8803365399d0] handle_mm_fault at ffffffff8113ccc4
#25 [ffff880336539a20] __do_page_fault at ffffffff81042b79
#26 [ffff880336539b40] do_page_fault at ffffffff814f495e
#27 [ffff880336539b70] page_fault at ffffffff814f1d15
    [exception RIP: __clear_user+63]
    RIP: ffffffff812791df  RSP: ffff880336539c28  RFLAGS: 00010202
    RAX: 0000000000000000  RBX: 0000000000df44a8  RCX: 000000000000016b
    RDX: 0000000000000000  RSI: 0000000000000b58  RDI: 0000000000df44a8
    RBP: ffff880336539c38   R8: 0000000000000000   R9: 0000000000000008
    R10: ffff8801a44c7318  R11: 0000000000000000  R12: 0000000000000b58
    R13: 0000000000000000  R14: ffff880336539f58  R15: 0000000000df44a8
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
#28 [ffff880336539c40] clear_user at ffffffff81279248
#29 [ffff880336539c60] padzero at ffffffff811cc93d
#30 [ffff880336539c70] load_elf_binary at ffffffff811ce96e
#31 [ffff880336539e40] search_binary_handler at ffffffff8117f5eb
#32 [ffff880336539eb0] do_execve at ffffffff81180779
#33 [ffff880336539f20] sys_execve at ffffffff810095ea
#34 [ffff880336539f50] stub_execve at ffffffff8100b54a
    RIP: 0000003d6f6ab4a7  RSP: 00007fffffffa628  RFLAGS: 00000203
    RAX: 000000000000003b  RBX: 00002aaaaaae05a8  RCX: ffffffffffffffff
    RDX: 0000000000868c08  RSI: 000000000088afc8  RDI: 0000000000869808
    RBP: 000000000088ab08   R8: 0000000000000000   R9: 00007fffffffa598
    R10: 00007fffffffa54c  R11: 0000000000000203  R12: 0000000000869808
    R13: 000000000087c628  R14: 000000000088afc8  R15: 000000000087ce48
    ORIG_RAX: 000000000000003b  CS: 0033  SS: 002b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Evidence of corrupted executable:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# ansel6 /root &amp;gt; objdump -x &amp;lt;path_to_exe&amp;gt;
objdump: &amp;lt;path_to_exe&amp;gt;: File truncated
# ansel6 /root &amp;gt; ls -l &amp;lt;path_to_exe&amp;gt;
-rwxrwx--- 1 &amp;lt;user&amp;gt; &amp;lt;group&amp;gt; 1048576 Apr  7 10:13 &amp;lt;path_to_exe&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="13929">LU-1299</key>
            <summary>running truncated executable causes spewing of lock debug messages</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jay">Jinshan Xiong</assignee>
                                    <reporter username="nedbass">Ned Bass</reporter>
                        <labels>
                    </labels>
                <created>Tue, 10 Apr 2012 15:56:38 +0000</created>
                <updated>Wed, 8 May 2013 02:38:36 +0000</updated>
                            <resolved>Wed, 8 May 2013 02:38:36 +0000</resolved>
                                    <version>Lustre 2.1.1</version>
                                    <fixVersion>Lustre 2.3.0</fixVersion>
                    <fixVersion>Lustre 2.1.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="34484" author="jay" created="Tue, 10 Apr 2012 20:09:36 +0000"  >&lt;p&gt;Hi Ned, I guess you are running &lt;a href=&quot;https://github.com/chaos/lustre.git&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/chaos/lustre.git&lt;/a&gt;. Can you please tell the last hash # of your running version because I fixed this kind of problem recently?&lt;/p&gt;</comment>
                            <comment id="34485" author="nedbass" created="Tue, 10 Apr 2012 20:27:13 +0000"  >&lt;p&gt;Server:&lt;br/&gt;
&lt;a href=&quot;https://github.com/chaos/lustre/commits/2.1.0-24chaos&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/chaos/lustre/commits/2.1.0-24chaos&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Client:&lt;br/&gt;
&lt;a href=&quot;https://github.com/chaos/lustre/commits/2.1.1-3chaos&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/chaos/lustre/commits/2.1.1-3chaos&lt;/a&gt;&lt;/p&gt;
</comment>
                            <comment id="34486" author="jay" created="Tue, 10 Apr 2012 20:48:16 +0000"  >&lt;p&gt;Can you please apply patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1163&quot; title=&quot;Never try to invalidate a dirty page&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1163&quot;&gt;&lt;del&gt;LU-1163&lt;/del&gt;&lt;/a&gt; with hash fdeace7e542360989645ea3dd9954a5f28c681aa, I think this may help.&lt;/p&gt;</comment>
                            <comment id="34490" author="jay" created="Tue, 10 Apr 2012 21:44:50 +0000"  >&lt;p&gt;After taking a closer look, I realize this is a different problem. For some reason cl_unuse returned -EIO. Can you please take a look at the log and find this error code?&lt;/p&gt;</comment>
                            <comment id="34521" author="pjones" created="Wed, 11 Apr 2012 10:04:23 +0000"  >&lt;p&gt;Yangsheng&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="34735" author="nedbass" created="Fri, 13 Apr 2012 13:59:49 +0000"  >&lt;p&gt;Jinshan,&lt;/p&gt;

&lt;p&gt;I think you mean -EINTR?  We had a repeat today (user running same broken executable) which gave me the chance to grab a debug log.  Uploading now.&lt;/p&gt;</comment>
                            <comment id="34736" author="nedbass" created="Fri, 13 Apr 2012 14:02:20 +0000"  >&lt;p&gt;See &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1299&quot; title=&quot;running truncated executable causes spewing of lock debug messages&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1299&quot;&gt;&lt;del&gt;LU-1299&lt;/del&gt;&lt;/a&gt;.lustre.log.gz on ftp.whamcloud.com&lt;/p&gt;</comment>
                            <comment id="35053" author="jay" created="Wed, 18 Apr 2012 17:18:50 +0000"  >&lt;p&gt;Please try patch &lt;a href=&quot;http://review.whamcloud.com/2574&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2574&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="35130" author="morrone" created="Thu, 19 Apr 2012 17:20:25 +0000"  >&lt;p&gt;Thanks, we are trying that patch.&lt;/p&gt;</comment>
                            <comment id="35244" author="nedbass" created="Fri, 20 Apr 2012 13:31:53 +0000"  >&lt;p&gt;I ran some quick tests with this patch applied and I still get the OOM behavior.  I do not get the lock spewing behavior, but that could just be because I&apos;m not running it the same way as the original user.  Perhaps we should create a separate issue for the OOMs.&lt;/p&gt;

&lt;p&gt;If I run ldd on the corrupt executable in Lustre, the node OOMs with the following backtrace.  Running ldd from a non-Lustre filesystem returns &quot;ldd: exited with unknown exit code (135)&quot;.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2012-04-20 10:07:40 Pid: 21585, comm: ld-linux-x86-64 Not tainted 2.6.32-220.13.1.2chaos.ch5.x86_64 #1
2012-04-20 10:07:40 Call Trace:
2012-04-20 10:07:40  [&amp;lt;ffffffff810c2de1&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
2012-04-20 10:07:40  [&amp;lt;ffffffff81113b40&amp;gt;] ? dump_header+0x90/0x1b0
2012-04-20 10:07:40  [&amp;lt;ffffffff8107c4ac&amp;gt;] ? lock_timer_base+0x3c/0x70
2012-04-20 10:07:40  [&amp;lt;ffffffff8120f01c&amp;gt;] ? security_real_capable_noaudit+0x3c/0x70
2012-04-20 10:07:40  [&amp;lt;ffffffff81113fca&amp;gt;] ? oom_kill_process+0x8a/0x2c0
2012-04-20 10:07:40  [&amp;lt;ffffffff81113f01&amp;gt;] ? select_bad_process+0xe1/0x120
2012-04-20 10:07:40  [&amp;lt;ffffffff81114420&amp;gt;] ? out_of_memory+0x220/0x3c0
2012-04-20 10:07:40  [&amp;lt;ffffffff81114685&amp;gt;] ? pagefault_out_of_memory+0xc5/0x110
2012-04-20 10:07:40  [&amp;lt;ffffffff810427be&amp;gt;] ? mm_fault_error+0x4e/0x100
2012-04-20 10:07:40  [&amp;lt;ffffffff81042d76&amp;gt;] ? __do_page_fault+0x336/0x480
2012-04-20 10:07:40  [&amp;lt;ffffffff8114273a&amp;gt;] ? do_mmap_pgoff+0x33a/0x380
2012-04-20 10:07:40  [&amp;lt;ffffffff814f481e&amp;gt;] ? do_page_fault+0x3e/0xa0
2012-04-20 10:07:40  [&amp;lt;ffffffff814f1bd5&amp;gt;] ? page_fault+0x25/0x30
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;If I try to run the corrupt from within Lustre, the node OOMs with the following backtrace.  Running the program from outside Lustre produces a segmentation fault. &lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2012-04-20 10:16:42 Pid: 22336, comm: test Not tainted 2.6.32-220.13.1.2chaos.ch5.x86_64 #1
2012-04-20 10:16:42 Call Trace:
2012-04-20 10:16:42  [&amp;lt;ffffffff810c2de1&amp;gt;] ? cpuset_print_task_mems_allowed+0x91/0xb0
2012-04-20 10:16:42  [&amp;lt;ffffffff81113b40&amp;gt;] ? dump_header+0x90/0x1b0
2012-04-20 10:16:42  [&amp;lt;ffffffff8107c4ac&amp;gt;] ? lock_timer_base+0x3c/0x70
2012-04-20 10:16:42  [&amp;lt;ffffffff8120f01c&amp;gt;] ? security_real_capable_noaudit+0x3c/0x70
2012-04-20 10:16:42  [&amp;lt;ffffffff81113fca&amp;gt;] ? oom_kill_process+0x8a/0x2c0
2012-04-20 10:16:42  [&amp;lt;ffffffff81113f01&amp;gt;] ? select_bad_process+0xe1/0x120
2012-04-20 10:16:42  [&amp;lt;ffffffff81114420&amp;gt;] ? out_of_memory+0x220/0x3c0
2012-04-20 10:16:42  [&amp;lt;ffffffff81114685&amp;gt;] ? pagefault_out_of_memory+0xc5/0x110
2012-04-20 10:16:42  [&amp;lt;ffffffff810427be&amp;gt;] ? mm_fault_error+0x4e/0x100
2012-04-20 10:16:42  [&amp;lt;ffffffff81042d76&amp;gt;] ? __do_page_fault+0x336/0x480
2012-04-20 10:16:42  [&amp;lt;ffffffff81131db0&amp;gt;] ? vma_prio_tree_insert+0x30/0x50
2012-04-20 10:16:42  [&amp;lt;ffffffff8113f2ec&amp;gt;] ? __vma_link_file+0x4c/0x80
2012-04-20 10:16:42  [&amp;lt;ffffffff8113facb&amp;gt;] ? vma_link+0x9b/0xf0
2012-04-20 10:16:42  [&amp;lt;ffffffff811420d9&amp;gt;] ? mmap_region+0x269/0x590
2012-04-20 10:16:42  [&amp;lt;ffffffff814f481e&amp;gt;] ? do_page_fault+0x3e/0xa0
2012-04-20 10:16:42  [&amp;lt;ffffffff814f1bd5&amp;gt;] ? page_fault+0x25/0x30
2012-04-20 10:16:42  [&amp;lt;ffffffff81278d2f&amp;gt;] ? __clear_user+0x3f/0x70
2012-04-20 10:16:42  [&amp;lt;ffffffff81278d11&amp;gt;] ? __clear_user+0x21/0x70
2012-04-20 10:16:42  [&amp;lt;ffffffff81278d98&amp;gt;] ? clear_user+0x38/0x40
2012-04-20 10:16:42  [&amp;lt;ffffffff811cc3fd&amp;gt;] ? padzero+0x2d/0x40
2012-04-20 10:16:42  [&amp;lt;ffffffff811ce42e&amp;gt;] ? load_elf_binary+0x88e/0x1b10
2012-04-20 10:16:42  [&amp;lt;ffffffff8113752f&amp;gt;] ? follow_page+0x31f/0x470
2012-04-20 10:16:42  [&amp;lt;ffffffff8113c830&amp;gt;] ? __get_user_pages+0x110/0x430
2012-04-20 10:16:42  [&amp;lt;ffffffff8117f06b&amp;gt;] ? search_binary_handler+0x11b/0x360
2012-04-20 10:16:42  [&amp;lt;ffffffff811801f9&amp;gt;] ? do_execve+0x239/0x340
2012-04-20 10:16:42  [&amp;lt;ffffffff81278e4a&amp;gt;] ? strncpy_from_user+0x4a/0x90
2012-04-20 10:16:42  [&amp;lt;ffffffff810095ea&amp;gt;] ? sys_execve+0x4a/0x80
2012-04-20 10:16:42  [&amp;lt;ffffffff8100b54a&amp;gt;] ? stub_execve+0x6a/0xc0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="35245" author="nedbass" created="Fri, 20 Apr 2012 13:34:32 +0000"  >&lt;p&gt;Uploading corrupt program. I was able to reproduce the OOM in a virtual machine by running ldd on it from within Lustre.&lt;/p&gt;</comment>
                            <comment id="35272" author="jay" created="Fri, 20 Apr 2012 16:48:06 +0000"  >&lt;p&gt;Hi Ned, please let me know after you have done uploading, I tried ldd but couldn&apos;t see the problem on my side. Also, can you please reproduce it with debug turning on and tell me which error code is returned by ll_fault0?&lt;/p&gt;</comment>
                            <comment id="35274" author="nedbass" created="Fri, 20 Apr 2012 16:52:07 +0000"  >&lt;p&gt;Hi Jinshan, the upload is done.  The file md5sum is b2babed0d72bfdbcf49c875c0c6d9d11.  I will follow up with the ll_fault0 error code.&lt;/p&gt;</comment>
                            <comment id="35280" author="jay" created="Fri, 20 Apr 2012 17:14:33 +0000"  >&lt;p&gt;what&apos;s the file name? The only file with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1299&quot; title=&quot;running truncated executable causes spewing of lock debug messages&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1299&quot;&gt;&lt;del&gt;LU-1299&lt;/del&gt;&lt;/a&gt; prefix is the log you uploaded last time.&lt;/p&gt;</comment>
                            <comment id="35284" author="nedbass" created="Fri, 20 Apr 2012 17:36:44 +0000"  >&lt;p&gt;Hi Jinshan, sorry I was unclear.  I attached the corrupt program to this issue with filename &quot;test&quot;.&lt;/p&gt;</comment>
                            <comment id="35308" author="jay" created="Fri, 20 Apr 2012 19:04:39 +0000"  >&lt;p&gt;Ned, thanks for the program, I can reproduce it on my machine. It hit this error: vvp_io.c:668:vvp_io_kernel_fault()) got addr 0000000000df4000 - SIGBUS&lt;/p&gt;

&lt;p&gt;This is interesting because error was returned by kenrel, I&apos;ll take a look at this.&lt;/p&gt;</comment>
                            <comment id="35310" author="nedbass" created="Fri, 20 Apr 2012 19:18:00 +0000"  >&lt;p&gt;I can reproduce this with a truncated copy of /bin/bash, i.e.&lt;/p&gt;

&lt;p&gt;% cp /bin/bash /mnt/lustre&lt;br/&gt;
% truncate -s 512K /mnt/lustre/bash&lt;br/&gt;
% /mnt/lustre/bash&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;OOM&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="35376" author="jay" created="Mon, 23 Apr 2012 14:05:17 +0000"  >&lt;p&gt;please try patch set 2 of &lt;a href=&quot;http://review.whamcloud.com/2574&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2574&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="35410" author="nedbass" created="Tue, 24 Apr 2012 11:18:09 +0000"  >&lt;p&gt;Thanks, we&apos;ll give it a try soon.&lt;/p&gt;

&lt;p&gt;Do you think we should add a regression test since we have a simple reproducer?  I&apos;d be happy to add it to the patch.&lt;/p&gt;</comment>
                            <comment id="35416" author="jay" created="Tue, 24 Apr 2012 14:24:58 +0000"  >&lt;p&gt;Good point. We should add it into sanity test.&lt;/p&gt;</comment>
                            <comment id="35534" author="nedbass" created="Thu, 26 Apr 2012 19:06:54 +0000"  >&lt;p&gt;I can no longer reproduce the problem with your patch applied.&lt;/p&gt;</comment>
                            <comment id="35570" author="nedbass" created="Fri, 27 Apr 2012 14:26:55 +0000"  >&lt;p&gt;I pushed patch set 3 with a regression test in sanity.sh.&lt;/p&gt;</comment>
                            <comment id="38173" author="jay" created="Fri, 4 May 2012 12:48:28 +0000"  >&lt;p&gt;Hi Ned, I made a combo patch (patch 2574 set 4) to merge fixes in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1355&quot; title=&quot;LBUG in lovsub_lock_delete_one -- Impossible state: 2 (CLS_ENQUEUED)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1355&quot;&gt;&lt;del&gt;LU-1355&lt;/del&gt;&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1356&quot; title=&quot;Assertion triggered in osc_lock_wait()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1356&quot;&gt;&lt;del&gt;LU-1356&lt;/del&gt;&lt;/a&gt;. Also it includes other fixes when I found during test. Please give it a try.&lt;/p&gt;</comment>
                            <comment id="38175" author="nedbass" created="Fri, 4 May 2012 12:54:55 +0000"  >&lt;p&gt;Will do, thanks.&lt;/p&gt;</comment>
                            <comment id="38249" author="jay" created="Mon, 7 May 2012 14:37:19 +0000"  >&lt;p&gt;I pushed patch set 5 to fix a problem when running on 2.6.18 kernels.&lt;/p&gt;</comment>
                            <comment id="38277" author="morrone" created="Mon, 7 May 2012 20:35:12 +0000"  >&lt;p&gt;I see patch set 6 now too.  We have patch set 4 installed and under testing.  We can try patch set 6 tomorrow if you like.&lt;/p&gt;</comment>
                            <comment id="38278" author="jay" created="Mon, 7 May 2012 21:05:08 +0000"  >&lt;p&gt;there is a bug in ll_nopage() so patch set 6 fixed it. Please give it a try, thanks.&lt;/p&gt;</comment>
                            <comment id="38350" author="morrone" created="Tue, 8 May 2012 18:17:22 +0000"  >&lt;p&gt;Just FYI, we hit the following with patchset 4 installed.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2012-05-07 22:22:04 LustreError: 11-0: lc2-OST0016-osc-ffff8808339c4400: Communicating with 10.1.1.49@o2ib9, operation ost_punch failed with -107.
2012-05-07 22:22:04 Lustre: lc2-OST0016-osc-ffff8808339c4400: Connection to lc2-OST0016 (at 10.1.1.49@o2ib9) was lost; in progress operations using this service will wait for recovery to complete
2012-05-07 22:22:04 LustreError: 167-0: This client was evicted by lc2-OST0016; in progress operations using this service will fail.
2012-05-07 22:22:04 LustreError: 17200:0:(osc_lock.c:816:osc_ldlm_completion_ast()) lock@ffff880434dade78[2 3 0 1 1 00000000] W(2):[0, 18446744073709551615]@[0x100160000:0x202101a:0x0] {
2012-05-07 22:22:04 LustreError: 17200:0:(osc_lock.c:816:osc_ldlm_completion_ast())     lovsub@ffff88041af9bfa0: [0 ffff88041f667af0 W(2):[0, 18446744073709551615]@[0x2002b80d0:0x111:0x0]] 
2012-05-07 22:22:04 LustreError: 17200:0:(osc_lock.c:816:osc_ldlm_completion_ast())     osc@ffff880432a19ee8: ffff880430c50900 80120002 0x9a383156624505ed 3 ffff8806d7fec8c0 size: 0 mtime: 1336454424 atime: 1336454357 ctime: 1336454424 blocks: 0
2012-05-07 22:22:04 LustreError: 17200:0:(osc_lock.c:816:osc_ldlm_completion_ast()) } lock@ffff880434dade78
2012-05-07 22:22:04 LustreError: 17200:0:(osc_lock.c:816:osc_ldlm_completion_ast()) dlmlock returned -5
2012-05-07 22:22:04 LustreError: 17200:0:(ldlm_resource.c:755:ldlm_resource_complain()) Namespace lc2-OST0016-osc-ffff8808339c4400 resource refcount nonzero (1) after lock cleanup; forcing cleanup.
2012-05-07 22:22:05 LustreError: 17200:0:(ldlm_resource.c:761:ldlm_resource_complain()) Resource: ffff880833b8c200 (33689626/0/0/0) (rc: 1)
2012-05-07 22:22:05 Lustre: lc2-OST0016-osc-ffff8808339c4400: Connection restored to lc2-OST0016 (at 10.1.1.49@o2ib9)
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1412:cl_unuse_try()) result = -5, this is unlikely!
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked()) lock@ffff8804311aee78[3 3 0 1 0 00000000] W(2):[0, 18446744073709551615]@[0x2002b80d0:0x111:0x0] {
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked())     vvp@ffff8804337a3b28: 
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked())     lov@ffff88041f667af0: 2
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked())     0 0: ---
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked())     1 0: lock@ffff880434dadd98[2 5 0 0 0 00000001] W(2):[0, 18446744073709551615]@[0x100190000:0x1ff4db5:0x0] {
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked())     lovsub@ffff88041af9bf60: [1 ffff88041f667af0 W(2):[0, 18446744073709551615]@[0x2002b80d0:0x111:0x0]] 
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked())     osc@ffff880432a19e30: ffff880430c50b40 80120002 0x9a383156624505f4 5 (null) size: 0 mtime: 1336454424 atime: 1336454406 ctime: 1336454424 blocks: 0
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked()) } lock@ffff880434dadd98
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked()) 
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked()) } lock@ffff8804311aee78
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:1426:cl_unuse_locked()) unuse return -5
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release()) lock@ffff8804311aee78[3 3 0 0 0 00000000] W(2):[0, 18446744073709551615]@[0x2002b80d0:0x111:0x0] {
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release())     vvp@ffff8804337a3b28: 
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release())     lov@ffff88041f667af0: 2
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release())     0 0: ---
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release())     1 0: lock@ffff880434dadd98[2 5 0 0 0 00000001] W(2):[0, 18446744073709551615]@[0x100190000:0x1ff4db5:0x0] {
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release())     lovsub@ffff88041af9bf60: [1 ffff88041f667af0 W(2):[0, 18446744073709551615]@[0x2002b80d0:0x111:0x0]] 
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release())     osc@ffff880432a19e30: ffff880430c50b40 80120002 0x9a383156624505f4 5 (null) size: 0 mtime: 1336454424 atime: 1336454406 ctime: 1336454424 blocks: 0
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release()) } lock@ffff880434dadd98
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release()) 
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release()) } lock@ffff8804311aee78
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release()) assertion failed.
2012-05-07 22:22:07 LustreError: 17164:0:(cl_lock.c:906:cl_lock_hold_release()) ASSERTION(lock-&amp;gt;cll_state != CLS_HELD) failed
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We are switching to patchset 6.&lt;/p&gt;</comment>
                            <comment id="38423" author="jay" created="Wed, 9 May 2012 13:31:59 +0000"  >&lt;p&gt;In patch set 7, I fixed the problem you mentioned above. Please give it a try.&lt;/p&gt;</comment>
                            <comment id="38586" author="morrone" created="Thu, 10 May 2012 18:42:17 +0000"  >&lt;p&gt;We&apos;re testing patch set 7.  The test cluster is giving us trouble for non-lustre reasons, so nothing conclusive to report yet.&lt;/p&gt;</comment>
                            <comment id="38726" author="pjones" created="Mon, 14 May 2012 10:11:30 +0000"  >&lt;p&gt;Any news yet?&lt;/p&gt;</comment>
                            <comment id="38745" author="morrone" created="Mon, 14 May 2012 13:31:17 +0000"  >&lt;p&gt;Unfortunately no.  That non-lustre problem hit again.  We&apos;re adjusting the test cluster to work around that problem.&lt;/p&gt;</comment>
                            <comment id="38873" author="morrone" created="Tue, 15 May 2012 18:35:43 +0000"  >&lt;p&gt;It looks like we made it through a day of testing without any obvious cl_lock messages.&lt;/p&gt;</comment>
                            <comment id="39024" author="spitzcor" created="Thu, 17 May 2012 17:04:24 +0000"  >&lt;p&gt;Jinshan referred to change #7574 before, but that is for b2_1.&lt;br/&gt;
FYI:&lt;br/&gt;
master - &lt;a href=&quot;http://review.whamcloud.com/#change,2654&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,2654&lt;/a&gt;&lt;br/&gt;
b2_1 -   &lt;a href=&quot;http://review.whamcloud.com/#change,2574&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,2574&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="39240" author="jay" created="Tue, 22 May 2012 17:15:04 +0000"  >&lt;p&gt;FYI: A new patch is submitted which allows the process to be killed while waiting for lock state.&lt;/p&gt;</comment>
                            <comment id="39936" author="spitzcor" created="Mon, 4 Jun 2012 13:50:15 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,2654&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,2654&lt;/a&gt; is marked &quot;Merged&quot; to master today, but the commits haven&apos;t been posted to this ticket (yet?).&lt;/p&gt;</comment>
                            <comment id="39945" author="pjones" created="Mon, 4 Jun 2012 15:05:35 +0000"  >&lt;p&gt;Cory&lt;/p&gt;

&lt;p&gt;Yes, this patch has landed to master and the ticket will close when the equivalent fix lands to b2_1 also. The commit updates back to JIRA seem to have been turned off since the lab relocation.&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="39962" author="morrone" created="Mon, 4 Jun 2012 19:19:24 +0000"  >&lt;p&gt;We have never had automated messages in jira about what happens in gerrit.  It would be great to add that some day.  Updates in jira tickets about what goes on in gerrit are completely manual.&lt;/p&gt;

&lt;p&gt;What we used to have was the Jenkins &quot;Build Master&quot; messages that were horrible.  To those I say &quot;good riddance&quot;.&lt;/p&gt;</comment>
                            <comment id="40040" author="spitzcor" created="Tue, 5 Jun 2012 16:17:28 +0000"  >&lt;p&gt;That&apos;s what I was noticing, the lack of &quot;Build Master&quot; messages.  I agree, good riddance.  So, it&apos;d be nice if folks could at least manually update tickets when patches are available for review and when they&apos;ve landed.&lt;/p&gt;</comment>
                            <comment id="40043" author="pjones" created="Tue, 5 Jun 2012 16:29:14 +0000"  >&lt;p&gt;Yes, folk are meant to manually update links to patches and then mark tickets resolve with the release the fix is landed for, but like any manual process there can be delays or oversights. I fix them when I spot them (and I do extensively check prior to a release going out). We also hope to add more useful automated updates from gerrit at some point.&lt;/p&gt;</comment>
                            <comment id="40535" author="nedbass" created="Wed, 13 Jun 2012 19:40:08 +0000"  >&lt;p&gt;Hi Jinshan,&lt;/p&gt;

&lt;p&gt;We got this report of deadlocked processes on Lustre clients that do &lt;b&gt;not&lt;/b&gt; have your patch applied.  The stacktraces make me think it is this bug since they involve clio, page fault, and signals.  However we have not seen this exact symptom before where the threads are deadlocked on a semaphore, so I wanted to confirm with you.  Thanks&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 101853  TASK: ffff88078f41aaa0  CPU: 1   COMMAND: &quot;calcLeakage&quot;
 #0 [ffff8807bc62d808] schedule at ffffffff814eed92
 #1 [ffff8807bc62d8d0] schedule_timeout at ffffffff814efc55
 #2 [ffff8807bc62d980] __down at ffffffff814f0b72
 #3 [ffff8807bc62d9d0] down at ffffffff81096db1
 #4 [ffff8807bc62da00] ll_inode_size_lock at ffffffffa09a78c2 [lustre]
 #5 [ffff8807bc62da30] ccc_prep_size at ffffffffa09d6788 [lustre]
 #6 [ffff8807bc62da90] vvp_io_fault_start at ffffffffa09db25b [lustre]
 #7 [ffff8807bc62db00] cl_io_start at ffffffffa057fcaa [obdclass]
 #8 [ffff8807bc62db40] cl_io_loop at ffffffffa05832e4 [obdclass]
 #9 [ffff8807bc62db90] ll_fault0 at ffffffffa09c4d44 [lustre]
#10 [ffff8807bc62dbf0] ll_fault at ffffffffa09c5008 [lustre]
#11 [ffff8807bc62dc40] __do_fault at ffffffff8113b464
#12 [ffff8807bc62dcd0] handle_pte_fault at ffffffff8113ba17
#13 [ffff8807bc62ddb0] handle_mm_fault at ffffffff8113c654
#14 [ffff8807bc62de00] __do_page_fault at ffffffff81042b79
#15 [ffff8807bc62df20] do_page_fault at ffffffff814f481e
#16 [ffff8807bc62df50] page_fault at ffffffff814f1bd5
    RIP: 00000000005381ce  RSP: 00007fffffffc1e0  RFLAGS: 00010246
    RAX: 00000000005c02f0  RBX: 00000000004acf50  RCX: 00000000009f2b20
    RDX: 00007fffffffd180  RSI: 00000000009f2a50  RDI: 00000000009f2a40
    RBP: 00007fffffffcc30   R8: 0000000000000001   R9: 0000000000000001
    R10: 00000000ffffbfff  R11: 00002aaaac27eea0  R12: 0000000000000000
    R13: 0000000000000006  R14: 0000000000000003  R15: 0000000000000028
    ORIG_RAX: ffffffffffffffff  CS: 0033  SS: 002b

PID: 101854  TASK: ffff8807be4d0ae0  CPU: 12  COMMAND: &quot;calcLeakage&quot;
 #0 [ffff8807b24b5ad0] schedule at ffffffff814eed92
 #1 [ffff8807b24b5b98] rwsem_down_failed_common at ffffffff814f1345
 #2 [ffff8807b24b5bf8] rwsem_down_read_failed at ffffffff814f14d6
 #3 [ffff8807b24b5c38] call_rwsem_down_read_failed at ffffffff81278a24
 #4 [ffff8807b24b5ca0] exit_mm at ffffffff8106e34c
 #5 [ffff8807b24b5ce0] do_exit at ffffffff8106e7ef
 #6 [ffff8807b24b5d60] do_group_exit at ffffffff8106ef48
 #7 [ffff8807b24b5d90] get_signal_to_deliver at ffffffff81084636
 #8 [ffff8807b24b5e30] do_signal at ffffffff8100a2d5
 #9 [ffff8807b24b5f30] do_notify_resume at ffffffff8100aaf0
#10 [ffff8807b24b5f50] int_signal at ffffffff8100b3c1
    RIP: 00002aaaac204613  RSP: 00002aaab2807df0  RFLAGS: 00000293
    RAX: fffffffffffffdfc  RBX: 0000000000981508  RCX: ffffffffffffffff
    RDX: 000000000000006e  RSI: 0000000000000002  RDI: 00002aaab2807e50
    RBP: 0000000000000009   R8: 0000000000000000   R9: 00002aaab2808700
    R10: 00002aaab2807b30  R11: 0000000000000293  R12: 0000000000000008
    R13: 000000000000006e  R14: 00002aaab2807e50  R15: 0000000000000000
    ORIG_RAX: 0000000000000007  CS: 0033  SS: 002b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="40625" author="jay" created="Fri, 15 Jun 2012 01:07:28 +0000"  >&lt;p&gt;clio has limited access to mmap_sem so I can&apos;t figure out any cases causing deadlock this way. However, kernel acquires mmap_sem to enter into fault routine so I&apos;ll take a look at kernel code to see if it&apos;s possible to leave mmap_sem unlocked and then finally cause deadlock to itself.&lt;/p&gt;</comment>
                            <comment id="40637" author="hongchao.zhang" created="Fri, 15 Jun 2012 05:12:00 +0000"  >&lt;p&gt;Hi Jay,&lt;br/&gt;
the following two URLs are also suspected to deadlock for mmap_sem, could you please help to check? Thanks!&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/d52af4d8-b67f-11e1-9e3f-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/d52af4d8-b67f-11e1-9e3f-52540035b04c&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_logs/9773fdb4-b680-11e1-9e3f-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_logs/9773fdb4-b680-11e1-9e3f-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="40642" author="jay" created="Fri, 15 Jun 2012 06:41:57 +0000"  >&lt;p&gt;Hi Hongchao, it looks like the problem you mentioned is imported by commit 60b5c3e464d6b4b333506e6db6b0635bb5a06577 (job stats). Obviously it was deadlocked at mmap_sem because do_mmap_pgoff() has already held write mode lock, so I don&apos;t think this is the same problem described by Ned. Can you please file a new ticket and ask Niu to take a look?&lt;/p&gt;</comment>
                            <comment id="42504" author="nedbass" created="Tue, 31 Jul 2012 13:11:10 +0000"  >&lt;p&gt;There seems to be significant churn in the b2_1 patch at &lt;a href=&quot;http://review.whamcloud.com/2574&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2574&lt;/a&gt; . Since the master patch has already been merged, please consider creating new jira issues and review requests for ongoing 2.1 development.  This would help us follow your work more easily.  Thanks.&lt;/p&gt;</comment>
                            <comment id="45663" author="jlevi" created="Thu, 27 Sep 2012 17:45:16 +0000"  >&lt;p&gt;Please reopen or create new tickets as appropriate if more work is needed.&lt;/p&gt;</comment>
                            <comment id="45678" author="nedbass" created="Thu, 27 Sep 2012 18:28:17 +0000"  >&lt;p&gt;Patch for 2.1 still in review.  Reopening.&lt;/p&gt;</comment>
                            <comment id="57878" author="adilger" created="Wed, 8 May 2013 02:38:36 +0000"  >&lt;p&gt;b2_1 patch was landed for 2.1.4, master patch was landed for 2.3.0.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11215" name="test" size="1048576" author="nedbass" created="Fri, 20 Apr 2012 13:34:32 +0000"/>
                    </attachments>
                <subtasks>
                            <subtask id="14208">LU-1356</subtask>
                            <subtask id="14207">LU-1355</subtask>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv6kn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4599</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>