<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:20:00 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1823] sanity/103: slab corruption</title>
                <link>https://jira.whamcloud.com/browse/LU-1823</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Lustre: DEBUG MARKER: == sanity test 103: acl test ========================================================================= 19:57:07 (1346774227)&lt;br/&gt;
/work/lustre/head/clean/lustre/utils/l_getidentity&lt;br/&gt;
Slab corruption (Tainted: P           ---------------   ): size-2048 start=dac6c470, len=2048&lt;br/&gt;
Redzone: 0x9f911029d74e35b/0x9f911029d74e35b.&lt;br/&gt;
Last user: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;dff39e58&amp;gt;&amp;#93;&lt;/span&gt;(cfs_free+0x8/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;)&lt;br/&gt;
310: 02 00 00 00 01 00 07 00 ff ff ff ff 02 00 05 00&lt;br/&gt;
320: 01 00 00 00 02 00 07 00 02 00 00 00 04 00 07 00&lt;br/&gt;
330: ff ff ff ff 10 00 07 00 ff ff ff ff 20 00 05 00&lt;br/&gt;
340: ff ff ff ff 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b&lt;br/&gt;
Next obj: start=dac6cc88, len=2048&lt;br/&gt;
Redzone: 0x9f911029d74e35b/0x9f911029d74e35b.&lt;br/&gt;
Last user: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;dff39e58&amp;gt;&amp;#93;&lt;/span&gt;(cfs_free+0x8/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;)&lt;br/&gt;
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b&lt;br/&gt;
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b&lt;/p&gt;



&lt;p&gt;02000000:00000010:1.0:1346774231.327841:1804:3373:0:(sec_null.c:217:null_alloc_repbuf()) kmalloced &apos;req-&amp;gt;rq_repbuf&apos;: 2048 at dac6c470.&lt;br/&gt;
...&lt;/p&gt;

&lt;p&gt;02000000:00000010:1.0:1346774231.328361:836:3373:0:(sec_null.c:231:null_free_repbuf()) kfreed &apos;req-&amp;gt;rq_repbuf&apos;: 2048 at dac6c470.&lt;/p&gt;</description>
                <environment>CONFIG_DEBUG_SLAB=y</environment>
        <key id="15675">LU-1823</key>
            <summary>sanity/103: slab corruption</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="green">Oleg Drokin</assignee>
                                    <reporter username="bzzz">Alex Zhuravlev</reporter>
                        <labels>
                    </labels>
                <created>Tue, 4 Sep 2012 13:00:39 +0000</created>
                <updated>Mon, 29 May 2017 03:35:02 +0000</updated>
                            <resolved>Thu, 13 Sep 2012 01:39:22 +0000</resolved>
                                    <version>Lustre 2.3.0</version>
                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.3.0</fixVersion>
                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="44149" author="pjones" created="Tue, 4 Sep 2012 14:23:38 +0000"  >&lt;p&gt;Keith is going to try and reproduce this with a debug kernel&lt;/p&gt;</comment>
                            <comment id="44150" author="keith" created="Tue, 4 Sep 2012 14:27:25 +0000"  >&lt;p&gt;Are there more logs from the rest of the systems? Is there anything special needed to reproduce this?  &lt;/p&gt;</comment>
                            <comment id="44151" author="bzzz" created="Tue, 4 Sep 2012 14:32:47 +0000"  >&lt;p&gt;I&apos;m able to reproduce this almost 100% with REFORMAT=y ONLY=103 sh sanity.sh, within single vbox instance.&lt;/p&gt;

</comment>
                            <comment id="44176" author="adilger" created="Tue, 4 Sep 2012 17:56:44 +0000"  >&lt;p&gt;I know in the past, Oleg, Johann, and I have wanted to run early development kernels with various debug options enabled for all kernel builds, so that this kind of problem can be flushed out when patches land instead of only at users who run these debug kernels, or hitting silent corruption problems.  This previously was suggested by Johann in TT-359, but I think it could be done with a patch to the kernel config options during the development cycle instead of via the test environment (which would need more effort/complexity).&lt;/p&gt;

&lt;p&gt;I think since we are early in the 2.4 release cycle that it makes sense to enable these config options for all our server kernels (so they will be seen for servers and clients running the server kernel).  We can leave this as a blocker bug for the 2.4 release to remember to revert the debug kernel config changes.&lt;/p&gt;

&lt;p&gt;Given the relatively small number of patches that have landed on master compared to 2.3, it probably also makes sense to submit a patch to b2_3 to enable CONFIG_DEBUG_SLAB, CONFIG_DEBUG_SPINLOCK and possibly some others, with:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Test-Parameters: fortestonly testgroup=full
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;to see if there is a similar failure for b2_3.&lt;/p&gt;</comment>
                            <comment id="44185" author="bzzz" created="Wed, 5 Sep 2012 00:31:32 +0000"  >&lt;p&gt;probably it&apos;d make sense to add a trivial (dmesg|grep &apos;Slab corruption&apos; &amp;amp;&amp;amp; error) to t-f so that we don&apos;t miss it.&lt;/p&gt;</comment>
                            <comment id="44189" author="keith" created="Wed, 5 Sep 2012 03:54:11 +0000"  >&lt;p&gt;I submitted a few config changes for b2_3 as suggested.  &lt;a href=&quot;http://review.whamcloud.com/3875&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3875&lt;/a&gt; .   &lt;/p&gt;

&lt;p&gt;I have been able to get some local testing done today. I tried an older 2.2.59 code base (I had it setup on one of my build servers) and I don&apos;t seem to see the problem there but I do see it with Master.  I will work to narrow down the window of possible changes. Sorting out if b2_3 is effected is my next step. &lt;/p&gt;</comment>
                            <comment id="44219" author="keith" created="Wed, 5 Sep 2012 15:14:29 +0000"  >&lt;p&gt;My config test didn&apos;t make it though build on the first pass but Yu has a very nice patch/test here I am watching &lt;a href=&quot;http://review.whamcloud.com/#change,3876&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3876&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="44262" author="yujian" created="Wed, 5 Sep 2012 23:44:58 +0000"  >&lt;p&gt;Hi Keith,&lt;/p&gt;

&lt;p&gt;By using the build &lt;a href=&quot;http://build.whamcloud.com/job/lustre-reviews/8904/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-reviews/8904/&lt;/a&gt; in &lt;a href=&quot;http://review.whamcloud.com/#change,3876&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3876&lt;/a&gt;, I can manually reproduce the slab corruption issue on RHEL6 distro by only running sanity test 103:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/2c479ade-f7d3-11e1-8b95-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/2c479ade-f7d3-11e1-8b95-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The autotest run for the above build skipped sanity test 103 because it&apos;s in the EXCEPT_SLOW list. I&apos;m updating the commit message to add SLOW=yes into the test parameters.&lt;/p&gt;</comment>
                            <comment id="44292" author="yujian" created="Thu, 6 Sep 2012 11:08:20 +0000"  >&lt;p&gt;Hi Keith,&lt;/p&gt;

&lt;p&gt;FYI, with the build for patch set 5 of &lt;a href=&quot;http://review.whamcloud.com/#change,3876&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3876&lt;/a&gt;, I reproduced the issue with PTLDEBUG=-1 manually:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/59a5ca46-f832-11e1-b114-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/59a5ca46-f832-11e1-b114-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="44320" author="adilger" created="Thu, 6 Sep 2012 18:02:14 +0000"  >&lt;p&gt;If there are no obvious sources of this corruption, it probably makes sense to submit this test patch as several separate changes, each based on one of the recent 2.2.* tags, to see if we can isolate when this corruption started.  After that, it is hopefully possible to do a (manual?) git-bisect to find which patch is the culprit, or at least narrow down the range of patches that need to be examined manually.  It is also important to check in each of the failure cases what node type the corruption is seen on (MDS, OSS, client), since that will also reduce the number of changes which might have introduced the problem.&lt;/p&gt;

&lt;p&gt;It would make sense to include a check for the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1844&quot; title=&quot;sanityn, subtest test_16: list_del corruption when run ofd + ldiskfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1844&quot;&gt;&lt;del&gt;LU-1844&lt;/del&gt;&lt;/a&gt; list_add/list_del corruption messages as well, since I suspect that is also a sign of random memory corruption.&lt;/p&gt;</comment>
                            <comment id="44323" author="keith" created="Thu, 6 Sep 2012 18:55:47 +0000"  >&lt;p&gt;I have started a git bisect to narrow down the code change but I fear it is not realiable data.  I am not sure what has happened on my local vms (I shuffled some vms around yesterday) but I am no longer able to reproduce the core issue.   I am running Lustre: 2.3.50 (from Master) with kernel-2.6.32-279.5.2 an not triggering the issue.  I am moving back to kernel-2.6.32-279.1.1 (confirmed failed with Yu&apos;s test run) to see if the issue reappears. &lt;/p&gt;

&lt;p&gt;I will update when I know more. &lt;/p&gt;</comment>
                            <comment id="44340" author="yujian" created="Thu, 6 Sep 2012 23:36:25 +0000"  >&lt;p&gt;Per the above test report, the slab corruption issue occurred only on the MDS (fat-intel-2):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;fat-intel-2: Slab corruption (Not tainted): size-2048 start=ffff8802e1b534f8, len=2048
fat-intel-2: Slab corruption (Not tainted): size-2048 start=ffff8802e1d776f8, len=2048
fat-intel-2: Slab corruption (Not tainted): size-2048 start=ffff8802e13ca4c8, len=2048
 sanity test_103: @@@@@@ FAIL: slab corruption detected 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="44343" author="keith" created="Fri, 7 Sep 2012 01:16:55 +0000"  >&lt;p&gt;Moving kernels does not seem to reproduce the issue so it is not a lead.  I am going to try some client nodes tomorrow.   I saw the error on the MDS as well on my initial Master run but have not see it since. &lt;/p&gt;</comment>
                            <comment id="44451" author="keith" created="Sun, 9 Sep 2012 17:21:27 +0000"  >&lt;p&gt;I acquired some torro nods today and am starting to setup.  My mds vm crashed while running &quot; REFORMAT=y ONLY=103 sh sanity.sh&quot;, it took about 30 hours to trigger).  This could be the bad cfs_free path that is corrupting the slab. &lt;/p&gt;

&lt;p&gt;I will try and attach the whole dmesg. &lt;/p&gt;

&lt;p&gt;This was master +  kernel-2.6.32-279 on the MDS vm node. &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; 
 Lustre: DEBUG MARKER: == sanity test 103: acl test =========================================== 06:06:43 (1347109603)
kfree_debugcheck: out of range ptr 6000100000002h.
------------[ cut here ]------------
kernel BUG at mm/slab.c:2911!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A03:00/PNP0C0A:00/power_supply/BAT0/energy_full
CPU 0
Modules linked in: cmm(U) osd_ldiskfs(U) mdt(U) mdd(U) mds(U) fsfilt_ldiskfs(U) exportfs mgs(U) mgc(U) ldiskfs(U) lustre(U) lquota(U) lov(U) osc(U) mdc(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) ksocklnd(U) lnet(U) sha512_generic sha256_generic libcfs(U) autofs4 sunrpc ipv6 ppdev parport_pc parport microcode i2c_piix4 i2c_core snd_intel8x0 snd_ac97_codec ac97_bus snd_seq snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc e1000 sg ext4 mbcache jbd2 sd_mod crc_t10dif sr_mod cdrom ahci pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib]

Pid: 24218, comm: jbd2/dm-2-8 Not tainted 2.6.32.masterDEBUG11A #1 innotek GmbH VirtualBox
RIP: 0010:[&amp;lt;ffffffff81162530&amp;gt;]  [&amp;lt;ffffffff81162530&amp;gt;] kfree_debugcheck+0x30/0x40
RSP: 0018:ffff88002733dba0  EFLAGS: 00010082
RAX: 0000000000000039 RBX: 0006000100000002 RCX: 0000000000007a74
RDX: 0000000000000000 RSI: 0000000000000046 RDI: 0000000000000046
RBP: ffff88002733dbb0 R08: 0000000000000000 R09: ffffffff8163acc0
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000202
R13: 0006000100000002 R14: ffff880024d9d298 R15: ffff880024d9d298
FS:  0000000000000000(0000) GS:ffff880002200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000003ac2ef5170 CR3: 000000003d0e0000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process jbd2/dm-2-8 (pid: 24218, threadinfo ffff88002733c000, task ffff88003d640ae0)
Stack:
 ffff880000000020 ffffffffa035ebae ffff88002733dc00 ffffffff8116594b
&amp;lt;d&amp;gt; ffff88002851f720 ffff88003f810080 ffff88002733dc20 0006000100000002
&amp;lt;d&amp;gt; ffff880024d9d240 0000000000000000 ffff880024d9d298 ffff880024d9d298
Call Trace:
 [&amp;lt;ffffffffa035ebae&amp;gt;] ? cfs_free+0xe/0x10 [libcfs]
 [&amp;lt;ffffffff8116594b&amp;gt;] kfree+0x5b/0x2a0
 [&amp;lt;ffffffffa035ebae&amp;gt;] cfs_free+0xe/0x10 [libcfs]
 [&amp;lt;ffffffffa04ceb73&amp;gt;] lu_global_key_fini+0xa3/0xf0 [obdclass]
 [&amp;lt;ffffffffa04cf380&amp;gt;] key_fini+0x60/0x190 [obdclass]
 [&amp;lt;ffffffffa04cf4df&amp;gt;] keys_fini+0x2f/0x120 [obdclass]
 [&amp;lt;ffffffffa04cf5fd&amp;gt;] lu_context_fini+0x2d/0xc0 [obdclass]
 [&amp;lt;ffffffffa0b86aa2&amp;gt;] osd_trans_commit_cb+0xe2/0x2b0 [osd_ldiskfs]
 [&amp;lt;ffffffffa0a3f21a&amp;gt;] ldiskfs_journal_commit_callback+0x8a/0xc0 [ldiskfs]
 [&amp;lt;ffffffffa00a18af&amp;gt;] jbd2_journal_commit_transaction+0x110f/0x1530 [jbd2]
 [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
 [&amp;lt;ffffffff8107eabb&amp;gt;] ? try_to_del_timer_sync+0x7b/0xe0
 [&amp;lt;ffffffffa00a7128&amp;gt;] kjournald2+0xb8/0x220 [jbd2]
 [&amp;lt;ffffffff81091d66&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff81091cd0&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
Code: 48 83 ec 08 0f 1f 44 00 00 48 89 fb e8 7a 67 ee ff 84 c0 74 07 48 83 c4 08 5b c9 c3 48 89 de 48 c7 c7 c8 0b 7a 81 e8 ed cc 39 00 &amp;lt;0f&amp;gt; 0b eb fe 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41
RIP  [&amp;lt;ffffffff81162530&amp;gt;] kfree_debugcheck+0x30/0x40
 RSP &amp;lt;ffff88002733dba0&amp;gt;
---[ end trace ff4011ce2a20c79c ]---
Kernel panic - not syncing: Fatal exception
Pid: 24218, comm: jbd2/dm-2-8 Tainted: G      D    ---------------    2.6.32.masterDEBUG11A #1
Call Trace:
 [&amp;lt;ffffffff814ff155&amp;gt;] ? panic+0xa0/0x168
 [&amp;lt;ffffffff815032e4&amp;gt;] ? oops_end+0xe4/0x100
 [&amp;lt;ffffffff8100f26b&amp;gt;] ? die+0x5b/0x90
 [&amp;lt;ffffffff81502bb4&amp;gt;] ? do_trap+0xc4/0x160
 [&amp;lt;ffffffff8100ce35&amp;gt;] ? do_invalid_op+0x95/0xb0
 [&amp;lt;ffffffff81162530&amp;gt;] ? kfree_debugcheck+0x30/0x40
 [&amp;lt;ffffffffa036def3&amp;gt;] ? libcfs_debug_vmsg2+0x4e3/0xb60 [libcfs]
 [&amp;lt;ffffffff8100bedb&amp;gt;] ? invalid_op+0x1b/0x20
 [&amp;lt;ffffffff81162530&amp;gt;] ? kfree_debugcheck+0x30/0x40
 [&amp;lt;ffffffffa035ebae&amp;gt;] ? cfs_free+0xe/0x10 [libcfs]
 [&amp;lt;ffffffff8116594b&amp;gt;] ? kfree+0x5b/0x2a0
 [&amp;lt;ffffffffa035ebae&amp;gt;] ? cfs_free+0xe/0x10 [libcfs]
 [&amp;lt;ffffffffa04ceb73&amp;gt;] ? lu_global_key_fini+0xa3/0xf0 [obdclass]
 [&amp;lt;ffffffffa04cf380&amp;gt;] ? key_fini+0x60/0x190 [obdclass]
 [&amp;lt;ffffffffa04cf4df&amp;gt;] ? keys_fini+0x2f/0x120 [obdclass]
 [&amp;lt;ffffffffa04cf5fd&amp;gt;] ? lu_context_fini+0x2d/0xc0 [obdclass]
 [&amp;lt;ffffffffa0b86aa2&amp;gt;] ? osd_trans_commit_cb+0xe2/0x2b0 [osd_ldiskfs]
 [&amp;lt;ffffffffa0a3f21a&amp;gt;] ? ldiskfs_journal_commit_callback+0x8a/0xc0 [ldiskfs]
 [&amp;lt;ffffffffa00a18af&amp;gt;] ? jbd2_journal_commit_transaction+0x110f/0x1530 [jbd2]
 [&amp;lt;ffffffff810096f0&amp;gt;] ? __switch_to+0xd0/0x320
 [&amp;lt;ffffffff8107eabb&amp;gt;] ? try_to_del_timer_sync+0x7b/0xe0
 [&amp;lt;ffffffffa00a7128&amp;gt;] ? kjournald2+0xb8/0x220 [jbd2]
 [&amp;lt;ffffffff810920d0&amp;gt;] ? autoremove_wake_function+0x0/0x40
 [&amp;lt;ffffffffa00a7070&amp;gt;] ? kjournald2+0x0/0x220 [jbd2]
 [&amp;lt;ffffffff81091d66&amp;gt;] ? kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c14a&amp;gt;] ? child_rip+0xa/0x20
 [&amp;lt;ffffffff81091cd0&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="44452" author="keith" created="Sun, 9 Sep 2012 17:23:14 +0000"  >&lt;p&gt;Keith local vm MDS panic -v1 dmesg&lt;/p&gt;</comment>
                            <comment id="44478" author="yujian" created="Mon, 10 Sep 2012 09:02:38 +0000"  >&lt;p&gt;Hi Keith,&lt;/p&gt;

&lt;p&gt;I created several test patches per the following comments from Andreas:&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;If there are no obvious sources of this corruption, it probably makes sense to submit this test patch as several separate changes, each based on one of the recent 2.2.* tags, to see if we can isolate when this corruption started.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Patch on tag 2.2.94: &lt;a href=&quot;http://review.whamcloud.com/#change,3921&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3921&lt;/a&gt;&lt;br/&gt;
Patch on tag 2.3.50: &lt;a href=&quot;http://review.whamcloud.com/#change,3918&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3918&lt;/a&gt;&lt;br/&gt;
Patch on tag 2.2.93: &lt;a href=&quot;http://review.whamcloud.com/#change,3919&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3919&lt;/a&gt;&lt;br/&gt;
Patch on tag 2.2.92: &lt;a href=&quot;http://review.whamcloud.com/#change,3920&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3920&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Hope we can isolate the issue.&lt;/p&gt;</comment>
                            <comment id="44525" author="adilger" created="Mon, 10 Sep 2012 14:18:27 +0000"  >&lt;p&gt;Keith, can you please fix Yu Jian&apos;s patches that hit build failures.&lt;/p&gt;

&lt;p&gt;The 2.3.50 patch failed to build due to built-in version checks, so it needs to be rebased one patch later (git hash 388111848489ef99b1fa31ce8fef255ab9c08e84).  I haven&apos;t investigated the other failure, but hopefully it is similarly trivial.  Please get to this ASAP so that the testing can be started on these patches, and hopefully we can isolate this serious defect more quickly.&lt;/p&gt;



</comment>
                            <comment id="44539" author="green" created="Mon, 10 Sep 2012 17:26:14 +0000"  >&lt;p&gt;Ok, I enabled debug pagealloc and caught the offender I think:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[ 3201.251050] Lustre: 9532:0:(mdt_lproc.c:418:lprocfs_wr_identity_upcall()) lus
tre-MDT0000: identity upcall set to /home/green/git/lustre-release/lustre/utils/
l_getidentity
[ 3204.127208] BUG: unable to handle kernel paging request at ffff88021851ab48
[ 3204.129164] IP: [&amp;lt;ffffffff811d9dd8&amp;gt;] posix_acl_to_xattr+0x28/0x80
[ 3204.130098] PGD 1a26063 PUD 2766067 PMD 2829067 PTE 800000021851a160
[ 3204.130673] Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
[ 3204.130673] last sysfs file: /sys/devices/system/cpu/possible
[ 3204.130673] CPU 1
[ 3204.130673] Modules linked in: lustre obdfilter ost cmm mdt osd_ldiskfs fsfil
t_ldiskfs ldiskfs mdd mds mgs lquota obdecho mgc lov osc mdc lmv fid fld ptlrpc obdclass lvfs ksocklnd lnet libcfs ext2 exportfs jbd sha512_generic sha256_gener
ic sunrpc ipv6 microcode virtio_balloon virtio_net i2c_piix4 i2c_core ext4 mbcac
he jbd2 virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix 
dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libcfs]
[ 3204.130673]
[ 3204.130673] Pid: 8603, comm: mdt00_001 Not tainted 2.6.32-debug #3 Bochs Boch
s
[ 3204.130673] RIP: 0010:[&amp;lt;ffffffff811d9dd8&amp;gt;]  [&amp;lt;ffffffff811d9dd8&amp;gt;] posix_acl_to
_xattr+0x28/0x80
[ 3204.130673] RSP: 0018:ffff88027a9459d0  EFLAGS: 00010287
[ 3204.130673] RAX: 0000000000000034 RBX: ffff88021851ab48 RCX: 0000000000000034
[ 3204.130673] RDX: 0000000000000060 RSI: ffff88021851ab48 RDI: ffff88023a694e50
[ 3204.130673] RBP: ffff88027a9459d0 R08: ffffffffa055f3cd R09: ffffffffa05f8e36
[ 3204.130673] R10: 0000000000000000 R11: ffffffffa05fee98 R12: ffff88023a694e50
[ 3204.130673] R13: 0000000000000060 R14: ffffffffa055f3cd R15: ffff8802470b5b98
[ 3204.130673] FS:  00007fcc8f724700(0000) GS:ffff880028240000(0000) knlGS:0000000000000000
[ 3204.130673] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 3204.130673] CR2: ffff88021851ab48 CR3: 00000002653cb000 CR4: 00000000000006e0
[ 3204.130673] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 3204.130673] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 3204.130673] &lt;span class=&quot;code-object&quot;&gt;Process&lt;/span&gt; mdt00_001 (pid: 8603, threadinfo ffff88027a944000, task ffff88027a942540)
[ 3204.130673] Stack:
[ 3204.130673]  ffff88027a945a10 ffffffffa05ac598 ffff88027a9459f0 ffffffffa067b661
[ 3204.130673] &amp;lt;d&amp;gt; 0000000000000010 ffff880222a70ef0 ffff880202a25bb8 ffff88027848b928
[ 3204.130673] &amp;lt;d&amp;gt; ffff88027a945a20 ffffffffa05ac612 ffff88027a945a30 ffffffff811a0147
[ 3204.130673] Call Trace:
[ 3204.130673]  [&amp;lt;ffffffffa05ac598&amp;gt;] ldiskfs_xattr_get_acl+0x78/0xa0 [ldiskfs]
[ 3204.130673]  [&amp;lt;ffffffffa067b661&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
[ 3204.130673]  [&amp;lt;ffffffffa05ac612&amp;gt;] ldiskfs_xattr_get_acl_access+0x22/0x30 [ldiskfs]
[ 3204.130673]  [&amp;lt;ffffffff811a0147&amp;gt;] generic_getxattr+0x87/0x90
[ 3204.130673]  [&amp;lt;ffffffffa074af64&amp;gt;] osd_xattr_get+0x284/0x360 [osd_ldiskfs]
[ 3204.130673]  [&amp;lt;ffffffffa05356d0&amp;gt;] mdd_attr_set+0x15b0/0x2250 [mdd]
[ 3204.130673]  [&amp;lt;ffffffffa067b661&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
[ 3204.130673]  [&amp;lt;ffffffffa0888a26&amp;gt;] cml_attr_set+0x66/0x1a0 [cmm]
[ 3204.130673]  [&amp;lt;ffffffffa07bc768&amp;gt;] mdt_attr_set+0x2a8/0x590 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa07bd096&amp;gt;] mdt_reint_setattr+0x646/0x13f0 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa067b661&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
[ 3204.130673]  [&amp;lt;ffffffffa07b6081&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa07af42a&amp;gt;] mdt_reint_internal+0x50a/0x810 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa07af774&amp;gt;] mdt_reint+0x44/0xe0 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa07a1d42&amp;gt;] mdt_handle_common+0x922/0x1740 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa07a2c35&amp;gt;] mdt_regular_handle+0x15/0x20 [mdt]
[ 3204.130673]  [&amp;lt;ffffffffa0f7486f&amp;gt;] ptlrpc_server_handle_request+0x44f/0xee0 [ptlrpc]
[ 3204.130673]  [&amp;lt;ffffffffa066b66e&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
[ 3204.130673]  [&amp;lt;ffffffffa0f6d711&amp;gt;] ? ptlrpc_wait_event+0xb1/0x2a0 [ptlrpc]
[ 3204.130673]  [&amp;lt;ffffffffa067b661&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
[ 3204.130673]  [&amp;lt;ffffffff81051f73&amp;gt;] ? __wake_up+0x53/0x70
[ 3204.130673]  [&amp;lt;ffffffffa0f773de&amp;gt;] ptlrpc_main+0xaee/0x1800 [ptlrpc]
[ 3204.130673]  [&amp;lt;ffffffffa0f768f0&amp;gt;] ? ptlrpc_main+0x0/0x1800 [ptlrpc]
[ 3204.130673]  [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
[ 3204.130673]  [&amp;lt;ffffffffa0f768f0&amp;gt;] ? ptlrpc_main+0x0/0x1800 [ptlrpc]
[ 3204.130673]  [&amp;lt;ffffffffa0f768f0&amp;gt;] ? ptlrpc_main+0x0/0x1800 [ptlrpc]
[ 3204.130673]  [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
[ 3204.130673] Code: c3 90 90 55 48 89 e5 0f 1f 44 00 00 48 63 47 04 48 85 f6 8d 04 c5 04 00 00 00 75 07 c9 c3 0f 1f 44 00 00 48 63 c8 48 39 d1 77 48 &amp;lt;c7&amp;gt; 06 02 00 00 00 8b 57 04 85 d2 74 e4 48 83 c6 04 31 d2 0f 1f
[ 3204.130673] RIP  [&amp;lt;ffffffff811d9dd8&amp;gt;] posix_acl_to_xattr+0x28/0x80
[ 3204.130673]  RSP &amp;lt;ffff88027a9459d0&amp;gt;
[ 3204.130673] CR2: ffff88021851ab48
[ 3204.130673] ---[ end trace f9aa2ef75b2f20bf ]---
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="44545" author="green" created="Mon, 10 Sep 2012 19:50:33 +0000"  >&lt;p&gt;Ok, my current theory is we have a problem coming from patch in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-998&quot; title=&quot;Test failure on test suite sanity, subtest test_103&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-998&quot;&gt;&lt;del&gt;LU-998&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;mdd_declare_attr_set:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (ma-&amp;gt;ma_attr.la_valid &amp;amp; LA_MODE) {
                mdd_read_lock(env, obj, MOR_TGT_CHILD);
                rc = mdo_xattr_get(env, obj, buf, XATTR_NAME_ACL_ACCESS,
                                   BYPASS_CAPA);
                mdd_read_unlock(env, obj);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;here we&apos;ll just use whatever happens to be in buf which sometimes ends up as garbage.&lt;/p&gt;

&lt;p&gt;I am not exactly clear on how it&apos;s supposed to be used, but it looks like the buffer should be reinitialized every time it&apos;s about to be used afresh (frankly I am not even sure why is it needed because it seems to be always duplicated in some local vars that we pass around). Other cases in that same function do initialize the buffer address.&lt;/p&gt;

&lt;p&gt;My reading of the code is such that we are just trying to make sure the xattrs exist at all, but if the buf happens to contain some pointer from before and declared size is big enough to fit the xattr, we read them to the buffer overwriting whatever important data was there (and it seems it&apos;s frequently points to freed and then possibly reallocated data too).&lt;/p&gt;

&lt;p&gt;The fix for this particular problem is to probably pass in &amp;amp;LU_BUF_NULL instead of buf argument. (patch at &lt;a href=&quot;http://review.whamcloud.com/3928&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3928&lt;/a&gt; and seems to be holding well in my testing)&lt;br/&gt;
The wider question if the buf is supposed to point to freed memory at any time still remains to be open and I need somebody&apos;s help to determine this.&lt;br/&gt;
If that is not supposed to happen, we can track down offending user (and I imagine it&apos;s probably not super valid because occasionally on shutdown we do try to free that memory that buffer points to and if it&apos;s invalid, that&apos;s a problem, even though nobody has ever seen this before for some reason).&lt;/p&gt;</comment>
                            <comment id="44553" author="keith" created="Mon, 10 Sep 2012 21:19:15 +0000"  >&lt;p&gt;Oleg are you testing with DEBUG_SLAB as well?   child_rip is the only thing that seems common there is no mention of the slab code path and cfs_free in your back trace.  &lt;/p&gt;

&lt;p&gt;Can you confirm that the slab errors have gone away with your patch?  I have put your patch in my test queue on the bisect setup as well. &lt;/p&gt;
</comment>
                            <comment id="44554" author="keith" created="Mon, 10 Sep 2012 21:20:20 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Keith, can you please fix Yu Jian&apos;s patches that hit build failures.

The 2.3.50 patch failed to build due to built-in version checks, so it needs to be rebased one patch later (git hash 388111848489ef99b1fa31ce8fef255ab9c08e84).  I haven&apos;t investigated the other failure, but hopefully it is similarly trivial.  Please get to this ASAP so that the testing can be started on these patches, and hopefully we can isolate this serious defect more quickly.

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I have started a fresh build/bisect setup with client-7 client-17 client-18.  I am able to reliability re-create here.  This is the exact build that does not trigger on my system and the same config changes at Yu&apos;s.  I fear that running on a random setups (via autotest) may lead to false negatives since some setups produce the error and some don&apos;t.  I will kick the builds along but I would not trust any single run passes as &quot;Not Broken&quot;. &lt;/p&gt;</comment>
                            <comment id="44555" author="green" created="Mon, 10 Sep 2012 21:36:20 +0000"  >&lt;p&gt;Keith, PAGEALLOC debug is a more powerful (and slow) kernel feature to track random memory writes. What it does is when you free a page instead of merely writing it with some poison value to check later (what you are hitting, and once hit - no way to know who made the write) it actually unmaps the page from address space, so when the offender comes to do the write, the page is not there and we get the nice oops pointing at the red-handed writer to the place they are not supposed to be writing.&lt;/p&gt;</comment>
                            <comment id="44571" author="keith" created="Tue, 11 Sep 2012 05:04:42 +0000"  >&lt;p&gt;Oleg I am familiar with PAGEALLOC. I am just wondering if it is a separate issue or not. I didn&apos;t get a chance to test your patch but I will do it first thing tomorrow. &lt;/p&gt;

&lt;p&gt;On my nodes can produce the issue I was able to test 2.2.59 and I don&apos;t see the issue but lu-998 code should be present it the release. &lt;/p&gt;

&lt;p&gt;I am building on the client nodes now to speed thing up I hope tomorrow I can get thing greatly narrowed down if Oleg&apos;s patch is not the root issue. &lt;/p&gt;

&lt;p&gt;Sorry I didn&apos;t get a change to push Yu&apos;s build errors yet.&lt;/p&gt;</comment>
                            <comment id="44584" author="yujian" created="Tue, 11 Sep 2012 09:07:58 +0000"  >&lt;p&gt;Hi Keith,&lt;br/&gt;
I submitted a patch which contained Oleg&apos;s patch and enabled the kernel debug options to verify whether the issue is resolved or not on master branch: &lt;a href=&quot;http://review.whamcloud.com/3936&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3936&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="44592" author="green" created="Tue, 11 Sep 2012 10:20:42 +0000"  >&lt;p&gt;Keith, ok. Well, the original issue my patch fixes is there since long ago, it might have been hidden to because nobody was leaving incorrect pointers in the mti_info buf, and then later somebody started to do it and that&apos;s why everything started to fall apart lately.&lt;br/&gt;
Granted I did not try to go back all the way to see if I can reproduce it on just the original commit&lt;/p&gt;</comment>
                            <comment id="44611" author="keith" created="Tue, 11 Sep 2012 17:26:59 +0000"  >&lt;p&gt;I been running &lt;a href=&quot;http://review.whamcloud.com/3928&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3928&lt;/a&gt; with Master for a few hours now and I do not seem to be able to reproduce the errors.   It looks to be the fix to me. &lt;/p&gt;

&lt;p&gt;If Yu&apos;s latest build agrees I will cherry pick it to b2_3 and b2_2. &lt;/p&gt;

&lt;p&gt;Thanks for finding this Oleg. &lt;/p&gt;</comment>
                            <comment id="44625" author="green" created="Tue, 11 Sep 2012 19:05:55 +0000"  >&lt;p&gt;Don&apos;t create separate patches for other branches, please/ I&apos;ll cherry-pick this patch directly since that part of the code have not changed since landing.&lt;/p&gt;</comment>
                            <comment id="44673" author="yujian" created="Wed, 12 Sep 2012 07:08:34 +0000"  >&lt;blockquote&gt;&lt;p&gt;I submitted a patch which contained Oleg&apos;s patch and enabled the kernel debug options to verify whether the issue is resolved or not on master branch: &lt;a href=&quot;http://review.whamcloud.com/3936&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3936&lt;/a&gt;.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;sanity test 103 passed without detecting slab corruption issue.&lt;/p&gt;</comment>
                            <comment id="44698" author="keith" created="Wed, 12 Sep 2012 12:27:10 +0000"  >&lt;p&gt;I have almost 24 hours of testing and still no recreation.   &lt;/p&gt;

&lt;p&gt;Oleg I will leave the cherry-pick to you. &lt;/p&gt;
</comment>
                            <comment id="44753" author="pjones" created="Thu, 13 Sep 2012 01:39:22 +0000"  >&lt;p&gt;Landed for 2.3 and 2.4&lt;/p&gt;</comment>
                            <comment id="73021" author="sarah" created="Fri, 6 Dec 2013 22:42:09 +0000"  >&lt;p&gt;review-dne failed as:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;12:47:05:[ 3488.673687] Lustre: 3067:0:(client.c:1903:ptlrpc_expire_one_request()) @@@ Request sent has failed due to network error: [sent 1386362637/real 1386362637]  req@ffff880036623bb8 x1453703367244336/t0(0) o38-&amp;gt;lustre-MDT0002-osp-MDT0000@10.10.16.232@tcp:24/4 lens 400/544 e 0 to 1 dl 1386362662 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1
12:47:06:[ 3488.676610] Lustre: 3067:0:(client.c:1903:ptlrpc_expire_one_request()) Skipped 305 previous similar messages
12:47:06:[ 3673.675126] BUG: unable to handle kernel paging request at ffff880022a6ae30
12:47:06:[ 3673.675968] IP: [&amp;lt;ffffffffa09ef395&amp;gt;] ksocknal_send+0x165/0x450 [ksocklnd]
12:47:06:[ 3673.676583] PGD 1a26063 PUD 1a2a063 PMD 215067 PTE 22a6a060
12:47:06:[ 3673.677103] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
12:47:06:[ 3673.677103] last sysfs file: /sys/devices/system/cpu/possible
12:47:07:[ 3673.677103] CPU 1 
12:47:07:[ 3673.677103] Modules linked in: osp(U) mdd(U) lfsck(U) lod(U) mdt(U) mgs(U) mgc(U) osd_ldiskfs(U) lquota(U) lustre(U) lov(U) osc(U) mdc(U) fid(U) fld(U) ksocklnd(U) ptlrpc(U) obdclass(U) lnet(U) sha512_generic sha256_generic libcfs(U) ldiskfs(U) jbd2 nfsd exportfs autofs4 nfs lockd fscache auth_rpcgss nfs_acl sunrpc ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core microcode virtio_balloon 8139too 8139cp mii i2c_piix4 i2c_core ext3 jbd mbcache virtio_blk virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib]
12:47:07:[ 3673.677103] 
12:47:07:[ 3673.677103] Pid: 3069, comm: ptlrpcd_1 Not tainted 2.6.32-358.23.2.el6_lustre.g3ddc521.x86_64 #1 Red Hat KVM
12:47:07:[ 3673.677103] RIP: 0010:[&amp;lt;ffffffffa09ef395&amp;gt;]  [&amp;lt;ffffffffa09ef395&amp;gt;] ksocknal_send+0x165/0x450 [ksocklnd]
12:47:07:[ 3673.677103] RSP: 0018:ffff88005f7bf980  EFLAGS: 00010286
12:47:07:[ 3673.677103] RAX: 0000000000000000 RBX: ffff88000eb08ef0 RCX: 0000000000000009
12:47:07:[ 3673.677103] RDX: ffff880002300000 RSI: ffff880002311b20 RDI: ffffffffa09feb38
12:47:07:[ 3673.677103] RBP: ffff88005f7bf9f0 R08: 0000000000000000 R09: 0000000000000000
12:47:07:[ 3673.677103] R10: ffff8800497d37e0 R11: 0000000000000000 R12: ffff880022a6adf0
12:47:07:[ 3673.677103] R13: 0000000000000001 R14: ffff880064cd64e0 R15: 00000000000000e0
12:47:07:[ 3673.677103] FS:  0000000000000000(0000) GS:ffff880002300000(0000) knlGS:0000000000000000
12:47:07:[ 3673.677103] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
12:47:07:[ 3673.677103] CR2: ffff880022a6ae30 CR3: 0000000001a25000 CR4: 00000000000006e0
12:47:07:[ 3673.677103] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
12:47:07:[ 3673.677103] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
12:47:07:[ 3673.677103] Process ptlrpcd_1 (pid: 3069, threadinfo ffff88005f7be000, task ffff88005c46c340)
12:47:07:[ 3673.677103] Stack:
12:47:07:[ 3673.677103]  ffff880015f79c01 0000000122a6adf0 0000000000000000 ffff880052054df0
12:47:07:[ 3673.677103] &amp;lt;d&amp;gt; 0000000000000000 000000e800000000 000200000a0a10ed 0000000000003039
12:47:07:[ 3673.677103] &amp;lt;d&amp;gt; ffff88005f7bfa10 ffff880052054df0 ffff880022a6adf0 ffff880022a6adf0
12:47:07:[ 3673.677103] Call Trace:
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0532dfb&amp;gt;] lnet_ni_send+0x4b/0xf0 [lnet]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0537005&amp;gt;] lnet_send+0x655/0xb80 [lnet]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa053806a&amp;gt;] LNetPut+0x31a/0x860 [lnet]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0808dc0&amp;gt;] ptl_send_buf+0x1e0/0x550 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffff8103b82c&amp;gt;] ? kvm_clock_read+0x1c/0x20
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa080c2bd&amp;gt;] ptl_send_rpc+0x4dd/0xcc0 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0800994&amp;gt;] ptlrpc_send_new_req+0x454/0x7c0 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0804728&amp;gt;] ptlrpc_check_set+0x898/0x1da0 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa083061b&amp;gt;] ptlrpcd_check+0x55b/0x590 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0830bdd&amp;gt;] ptlrpcd+0x2ad/0x3f0 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffff8105bca0&amp;gt;] ? default_wake_function+0x0/0x20
12:47:08:[ 3673.677103]  [&amp;lt;ffffffffa0830930&amp;gt;] ? ptlrpcd+0x0/0x3f0 [ptlrpc]
12:47:08:[ 3673.677103]  [&amp;lt;ffffffff81095696&amp;gt;] kthread+0x96/0xa0
12:47:08:[ 3673.677103]  [&amp;lt;ffffffff8100c10a&amp;gt;] child_rip+0xa/0x20
12:47:08:[ 3673.677103]  [&amp;lt;ffffffff81095600&amp;gt;] ? kthread+0x0/0xa0
12:47:09:[ 3673.677103]  [&amp;lt;ffffffff8100c100&amp;gt;] ? child_rip+0x0/0x20
12:47:09:[ 3673.677103] Code: 00 c7 43 60 c1 00 00 00 48 89 de 48 c7 43 70 00 00 00 00 48 c7 43 68 00 00 00 00 48 8b 55 c0 8b 4d c8 48 8b 7d a8 e8 7b fa ff ff &amp;lt;41&amp;gt; f6 44 24 40 08 74 1b 8b 4d b4 85 c9 0f 84 c8 00 00 00 65 48 
12:47:09:[ 3673.677103] RIP  [&amp;lt;ffffffffa09ef395&amp;gt;] ksocknal_send+0x165/0x450 [ksocklnd]
12:47:09:[ 3673.677103]  RSP &amp;lt;ffff88005f7bf980&amp;gt;
12:47:09:[ 3673.677103] CR2: ffff880022a6ae30
12:47:09:[    0.000000] Initializing cgroup subsys cpuset
12:47:09:[    0.000000] Initializing cgroup subsys cpu
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="13620">LU-1235</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="15864">LU-1877</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="15712">LU-1844</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="11838" name="mdt-serial.log" size="914961" author="keith" created="Sun, 9 Sep 2012 17:23:14 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv4cf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4237</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>