[LU-3224] GPF in dput()/d_kill(), in "ldlm_bl_xx" command, while umount command is running Created: 25/Apr/13 Updated: 23/Jul/13 Resolved: 23/Jul/13 |
|
| Status: | Resolved |
| Project: | Lustre |
| Component/s: | None |
| Affects Version/s: | Lustre 2.1.3 |
| Fix Version/s: | None |
| Type: | Bug | Priority: | Major |
| Reporter: | Patrick Valentin (Inactive) | Assignee: | Bruno Faccini (Inactive) |
| Resolution: | Cannot Reproduce | Votes: | 0 |
| Labels: | None | ||
| Attachments: |
|
| Severity: | 3 |
| Rank (Obsolete): | 7883 |
| Description |
|
The support team reported the following GPF on a lustre 2.1.3 client, on the CEA demonstrator cluster. Below are their debug traces and comments ------------------------------- On the demonstrateur cluster, we have meet a crash of a lustre client and my analyze could be demonstrate that there are a ""race"" on the dentry shrink process during umount : Race condition between shrink_dcache_for_umount_subtree()(umount) and dput()/d_kill() (ldlm_bl) general_protection on d_kill on because LIST_POISON1 on RDX crash> bt 26203
PID: 26203 TASK: ffff88033d5dc040 CPU: 7 COMMAND: "ldlm_bl_44"
#0 [ffff8802e12e7ab0] machine_kexec at ffffffff8102895b
#1 [ffff8802e12e7b10] crash_kexec at ffffffff810a4622
#2 [ffff8802e12e7be0] oops_end at ffffffff814886c0
#3 [ffff8802e12e7c10] die at ffffffff8100714b
#4 [ffff8802e12e7c40] do_general_protection at ffffffff81488252
#5 [ffff8802e12e7c70] general_protection at ffffffff81487a25
[exception RIP: d_kill+53]
RIP: ffffffff81179f05 RSP: ffff8802e12e7d20 RFLAGS: 00010282
RAX: dead000000200200 RBX: ffff8805d305a180 RCX: ffff8805d305a1c0
RDX: dead000000100100 RSI: dead000000200200 RDI: dead000000100100
RBP: ffff8802e12e7d30 R8: 00000000deadbeef R9: 5a5a5a5a5a5a5a5a
R10: 5a5a5a5a5a5a5a5a R11: 5a5a5a5a5a5a5a5a R12: ffff8805d305a188
R13: ffffffffa0a848a0 R14: ffff88053dd101e0 R15: ffff8804d9652af8
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#6 [ffff8802e12e7d38] dput at ffffffff8117b971
#7 [ffff8802e12e7d58] ll_drop_dentry at ffffffffa09eb626 [lustre]
#8 [ffff8802e12e7d98] ll_md_blocking_ast at ffffffffa0a2c702 [lustre]
#9 [ffff8802e12e7e08] ldlm_cancel_callback at ffffffffa05f01f0 [ptlrpc]
#10 [ffff8802e12e7e28] ldlm_cli_cancel_local at ffffffffa060a20b [ptlrpc]
#11 [ffff8802e12e7e58] ldlm_cli_cancel_list_local at ffffffffa060cafd [ptlrpc]
#12 [ffff8802e12e7eb8] ldlm_bl_thread_main at ffffffffa06114f0 [ptlrpc]
#13 [ffff8802e12e7f48] kernel_thread at ffffffff8100412a
d_kill() code where the GPF occured: 0xffffffff81179edc <d_kill+12>: mov %r12,0x8(%rsp) 0xffffffff81179ee1 <d_kill+17>: nopl 0x0(%rax,%rax,1) 0xffffffff81179ee6 <d_kill+22>: mov 0x50(%rdi),%rdx <--- rdi = 0xffff8805d305a180=rbx struct dentry.d_u.d_child.next 0xffffffff81179eea <d_kill+26>: mov 0x58(%rdi),%rax <----rdi = 0xffff8805d305a180=rbx struct dentry.d_u.d_child.prev 0xffffffff81179eee <d_kill+30>: mov $0xdead000000200200,%rsi LIST_POISON2 0xffffffff81179ef8 <d_kill+40>: mov %rdi,%rbx rbx = *(struct dentry *) 0xffffffff81179efb <d_kill+43>: mov $0xdead000000100100,%rdi LIST_POISON1 0xffffffff81179f05 <d_kill+53>: mov %rax,0x8(%rdx) <--- general_protection RDX: dead000000100100 is LIST_POISON1 rax=dead000000200200 (LIST_POISON2) 0xffffffff81179f09 <d_kill+57>: mov %rdx,(%rax) 0xffffffff81179f0c <d_kill+60>: mov %rdi,0x50(%rbx) corresponding dentry structure: crash> struct dentry ffff8805d305a180
crash> struct dentry ffff8805d305a180
struct dentry {
d_count = {
counter = 0
},
d_flags = 24,
d_lock = {
raw_lock = {
slock = 458758
}
},
d_mounted = 0,
d_inode = 0x0,
d_hash = {
next = 0x0,
pprev = 0xdead000000200200
},
d_parent = 0xffff88053dd10180,
d_name = {
hash = 3302437006,
len = 24,
name = 0xffff8805d305a220 "[0x2000abc6b:0x10bb:0x0]"
},
d_lru = {
next = 0xffff8805d305a1c0,
prev = 0xffff8805d305a1c0
},
d_u = {
d_child = {
next = 0xffff8803d633e710,
prev = 0xffffffff8117a770
},
d_rcu = {
next = 0xffff8803d633e710, <--- the dentry ffff8805d305a180 was put on the rcu queue
func = 0xffffffff8117a770 <d_callback>
}
},
d_subdirs = {
next = 0xffff8805d305a1e0,
prev = 0xffff8805d305a1e0
},
d_alias = {
next = 0xffff8805d305a1f0,
prev = 0xffff8805d305a1f0
},
d_time = 0,
d_op = 0xffffffffa0a64140,
d_sb = 0xffff88033cd03000,
d_fsdata = 0xdeadbeef,
d_iname = "[0x2000abc6b:0x10bb:0x0]\000\000\000\000\000\000\000"
}
On THIS dentry.d-u pointer, now, the "poison" pointer has desapear and now this dentry is on the RCU list to be freed on the next quiesed period. crash> bt 10428
PID: 10428 TASK: ffff88033d2c1850 CPU: 0 COMMAND: "umount"
#0 [ffff880028207e90] crash_nmi_callback at ffffffff81020b56
#1 [ffff880028207ea0] notifier_call_chain at ffffffff8148a755
#2 [ffff880028207ee0] atomic_notifier_call_chain at ffffffff8148a7ba
#3 [ffff880028207ef0] notify_die at ffffffff810817ce
#4 [ffff880028207f20] do_nmi at ffffffff814883d3
#5 [ffff880028207f50] nmi at ffffffff81487ce0
[exception RIP: ldlm_res_hop_fid_hash+25]
RIP: ffffffffa05f43b9 RSP: ffff880120b2bb98 RFLAGS: 00000292
RAX: ffffffffa06acda0 RBX: ffff8805f7cbc300 RCX: 0000000000000010
RDX: 000000000000ffff RSI: ffff880120b2bcc8 RDI: ffff8805f7cbc300
RBP: ffff880120b2bbd8 R8: ffff880120b2bc48 R9: 5a5a5a5a5a5a5a5a
R10: 5a5a5a5a5a5a5a5a R11: 00000002000abc55 R12: 0000000000000010
R13: ffff8805f8216bc0 R14: ffff880120b2bc48 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#6 [ffff880120b2bb98] ldlm_res_hop_fid_hash at ffffffffa05f43b9 [ptlrpc]
#7 [ffff880120b2bbe0] cfs_hash_bd_from_key at ffffffffa03db192 [libcfs]
#8 [ffff880120b2bc10] cfs_hash_bd_get at ffffffffa03db8a5 [libcfs]
#9 [ffff880120b2bc20] ldlm_resource_get at ffffffffa05f7099 [ptlrpc]
#10 [ffff880120b2bc90] ldlm_resource_iterate at ffffffffa0609b1d [ptlrpc]
#11 [ffff880120b2bcc0] mdc_find_cbdata at ffffffffa08a63cc [mdc]
#12 [ffff880120b2bd20] lmv_find_cbdata at ffffffffa0b17501 [lmv]
#13 [ffff880120b2bd80] find_cbdata at ffffffffa09ecdd2 [lustre]
#14 [ffff880120b2bdb0] ll_d_iput at ffffffffa09ed8be [lustre]
#15 [ffff880120b2bdd0] shrink_dcache_for_umount_subtree at ffffffff8117b36d
#16 [ffff880120b2be20] shrink_dcache_for_umount at ffffffff8117b4f6
#17 [ffff880120b2be40] generic_shutdown_super at ffffffff811653ef
#18 [ffff880120b2be60] kill_anon_super at ffffffff81165546
#19 [ffff880120b2be80] lustre_kill_super at ffffffffa04c699a [obdclass]
#20 [ffff880120b2bea0] deactivate_super at ffffffff811664e0
#21 [ffff880120b2bec0] mntput_no_expire at ffffffff811826bf
#22 [ffff880120b2bef0] sys_umount at ffffffff81183188
#23 [ffff880120b2bf80] system_call_fastpath at ffffffff810030f2
RIP: 00007fd4f64ccd67 RSP: 00007fff034b8a38 RFLAGS: 00010202
RAX: 00000000000000a6 RBX: ffffffff810030f2 RCX: 0000000000000010
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007fd4f82f8f80
RBP: 00007fd4f82f8f40 R8: 00007fd4f82f8fa0 R9: 0000000000000000
R10: 00007fff034b8860 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 00007fd4f82f8fe0
ORIG_RAX: 00000000000000a6 CS: 0033 SS: 002b
dentry ptr in the stack: #14 [ffff880120b2bdb0] ll_d_iput at ffffffffa09ed8be [lustre]
ffff880120b2bdb8: 0000000000000246 ffff880593ec4600 <-- here ffff880593ec4600 = rbx
ffff880120b2bdc8: ffff880120b2be18 ffffffff8117b36d
#15 [ffff880120b2bdd0] shrink_dcache_for_umount_subtree at ffffffff8117b36d
corresponding dentry structure: crash> struct dentry ffff880593ec4600
struct dentry {
d_count = {
counter = 0
},
d_flags = 24,
d_lock = {
raw_lock = {
slock = 327685
}
},
d_mounted = 1768293427,
d_inode = 0x0,
d_hash = {
next = 0x0,
pprev = 0xdead000000200200
},
d_parent = 0xffff88053dd10180,
d_name = {
hash = 302013360,
len = 25,
name = 0xffff880593ec46a0 "[0x2000abc55:0x13886:0x0]"
},
d_lru = {
next = 0xffff880593ec4640,
prev = 0xffff880593ec4640
},
d_u = {
d_child = {
next = 0xdead000000100100, <-- LIST_POISON1
prev = 0xdead000000200200 <-- LIST_POISON2
},
d_rcu = {
next = 0xdead000000100100,
func = 0xdead000000200200
}
},
d_subdirs = {
next = 0xffff880593ec4660,
prev = 0xffff880593ec4660
},
d_alias = {
next = 0xffff880593ec4670,
prev = 0xffff880593ec4670
},
d_time = 18446612139194222114,
d_op = 0xffffffffa0a64140,
d_sb = 0xffff88033cd03000,
d_fsdata = 0xffff8804ed89bf40,
d_iname = "[0x2000abc55:0x13886:0x0]\000\000\t\033&{"
}
CPU7 run thread ldlm_bl_44 (pid 26203) ll_drop_dentry(0xffff8805d305a180) CPU0 run thread umount (pid 10428) ll_d_iput(0xffff880593ec4600) The two dentry have the same d_parent, so it's possible to say that the function shrink_dcache_for_umount_subtree() have finish its jobs on the dentry ffff8805d305a180 that ll_drop_dentry() try to remove/drop. monospaced , If two functions can be remove a same dentry in the same time, we can say that there are a race between the two function ... In attachment you can find my crash analyse trace, a foreach_bt.txt trace and also the last lustre trace messages. |
| Comments |
| Comment by Peter Jones [ 25/Apr/13 ] |
|
Bruno Could you please help with this one? Thanks Peter |
| Comment by Bruno Faccini (Inactive) [ 25/Apr/13 ] |
|
Sure, I wanted to assign it to me but you are too fast Peter !! Patrick (and Antoine ?), thank's for this so well detailled bug-report !! Just want to ask you some more info to confirm it, what does reports the crash sub-command "list 0xffff880356ad4c10" ?? Very long list of sub-dirs ?? Inconsistency ?? |
| Comment by Lustre Bull [ 25/Apr/13 ] |
|
Hi Bruno, |
| Comment by Antoine Percher [ 26/Apr/13 ] |
|
Hi Bruno, |
| Comment by Bruno Faccini (Inactive) [ 02/May/13 ] |
|
Antoine, CPU 7 run thread ldlm_bl_44 (pid 26203) on ll_drop_dentry(0xffff8805d305a180) and
have the dcache_lock and do the __d_drop(0xffff8805d305a180) ;
CPU 0 run thread umount (pid 10428) and wait dcache_lock to do loop one_dentry.d_u.d_child on shrink_dcache_for_umount_subtree ()
CPU 7 run thread ldlm_bl_44 (pid 26203) on ll_drop_dentry(0xffff8805d305a180) unlock dcache_lock and run dput(dentry); and wait dcache_lock
CPU 0 run thread umount (pid 10428) take dcache_lock
do loop one_dentry.d_u.d_child
unlock dcache_lock
mov 0xffff8805d305a180 as first child
do list_del(&dentry->d_u.d_child); (put LIST_POISON1 on 0xffff8805d305a180->d_u.d_child
CPU 7 run thread ldlm_bl_44 (pid 26203) on dput(0xffff8805d305a180); have the dcache_lock do d_kill(0xffff8805d305a180)
and try to do list_del(&0xffff8805d305a180->d_u.d_child);
and do generale_protection due to LIST_POISON1
CPU 0 run thread umount (pid 10428) d_free(dentry) and put the dentry on the rcu queue via call_rcu()
looks possible. In fact, what puzzles me is that it is the 1st bug-report for this suspected locking-hole !!... But may be it is a consequence of the huge directory size ?? The "- the filesystem itself is no longer permitted to rearrange the dentries in this superblock" comment in shrink_dcache_for_umount() may not be true in some of our callback mechanisms, which feel they are safe to do so using both dcache_lock an dentry.d_lock protections ... |
| Comment by Bruno Faccini (Inactive) [ 02/May/13 ] |
|
Seems that commit 94509cda from change http://review.whamcloud.com/2400 for BTW, even if quite old it has only been integrated in 2.1.5. |
| Comment by Antoine Percher [ 03/May/13 ] |
|
Yes, effectivly in 2.1.5, the function ll_drop_negative_dentry() was replaced by |
| Comment by Bruno Faccini (Inactive) [ 07/May/13 ] |
|
So Patrick, what do you think ?? Will you try to integrate http://review.whamcloud.com/2400 in your current sources branch or will you upgrade to 2.1.5 ?? |
| Comment by Sebastien Buisson (Inactive) [ 13/May/13 ] |
|
Hi Bruno, We will take 2.1.5. Thanks a lot! |
| Comment by Bruno Faccini (Inactive) [ 15/Jul/13 ] |
|
Patrick, Sebastien, |
| Comment by Sebastien Buisson (Inactive) [ 23/Jul/13 ] |
|
Sure, this ticket can be marked as resolved, thanks! |
| Comment by Peter Jones [ 23/Jul/13 ] |
|
ok thanks Sebastien |