Details
-
Bug
-
Resolution: Fixed
-
Major
-
Lustre 2.5.2
-
None
-
3
-
15395
Description
While testing 2.5 servers for a possible upcoming test shot I hit this bug while running simul.
<6>[ 3724.190222] mdt00_003 D 0000000000000010 0 15957 2 0x00000000
<4>[ 3724.197292] ffff8807e1f118b8 0000000000000046 0000000000000000 ffffffffa0f5b6eb
<4>[ 3724.205018] ffff8810021cf190 ffff8810021cf138 ffff8808326a0538 ffffffffa0f5b6eb
<4>[ 3724.212759] ffff8807f846fab8 ffff8807e1f11fd8 000000000000fbc8 ffff8807f846fab8
<4>[ 3724.220491] Call Trace:
<4>[ 3724.223032] [<ffffffff8152a6d5>] rwsem_down_failed_common+0x95/0x1d0
<4>[ 3724.229597] [<ffffffffa0d9d3fb>] ? ldiskfs_xattr_trusted_get+0x2b/0x30 [ldiskfs]
<4>[ 3724.237247] [<ffffffff811ae017>] ? generic_getxattr+0x87/0x90
<4>[ 3724.243199] [<ffffffff8152a866>] rwsem_down_read_failed+0x26/0x30
<4>[ 3724.249499] [<ffffffffa0fe8083>] ? lod_xattr_get+0x153/0x420 [lod]
<4>[ 3724.255867] [<ffffffff8128eab4>] call_rwsem_down_read_failed+0x14/0x30
<4>[ 3724.262580] [<ffffffff81529d64>] ? down_read+0x24/0x30
<4>[ 3724.267923] [<ffffffffa0f2569d>] mdt_object_open_lock+0x1ed/0x9d0 [mdt]
<4>[ 3724.274736] [<ffffffffa0f077e0>] ? mdt_attr_get_complex+0x520/0x7f0 [mdt]
<4>[ 3724.281720] [<ffffffffa0f2dcc7>] mdt_reint_open+0x15b7/0x2150 [mdt]
<4>[ 3724.288187] [<ffffffffa05e9f76>] ? upcall_cache_get_entry+0x296/0x880 [libcfs]
<4>[ 3724.295688] [<ffffffffa073fc10>] ? lu_ucred+0x20/0x30 [obdclass]
<4>[ 3724.301900] [<ffffffffa0f16611>] mdt_reint_rec+0x41/0xe0 [mdt]
<4>[ 3724.307916] [<ffffffffa0efbe63>] mdt_reint_internal+0x4c3/0x780 [mdt]
<4>[ 3724.314551] [<ffffffffa0efc3ee>] mdt_intent_reint+0x1ee/0x520 [mdt]
<4>[ 3724.321023] [<ffffffffa0ef9bce>] mdt_intent_policy+0x3ae/0x770 [mdt]
<4>[ 3724.327619] [<ffffffffa085a2e5>] ldlm_lock_enqueue+0x135/0x950 [ptlrpc]
<4>[ 3724.334445] [<ffffffffa0883ccf>]
ldlm_handle_enqueue0+0x50f/0x10c0 [ptlrpc]
<4>[ 3724.334445] [<ffffffffa0883ccf>] ldlm_handle_enqueue0+0x50f/0x10c0 [ptlrpc]
<4>[ 3724.341608] [<ffffffffa0efa096>] mdt_enqueue+0x46/0xe0 [mdt]
<4>[ 3724.347484] [<ffffffffa0efec5a>] mdt_handle_common+0x52a/0x1470 [mdt]
<4>[ 3724.354147] [<ffffffffa0f3b945>] mds_regular_handle+0x15/0x20 [mdt]
<4>[ 3724.360670] [<ffffffffa08b4015>] ptlrpc_server_handle_request+0x385/0xc00 [ptlrpc]
<4>[ 3724.368508] [<ffffffffa05ce4ce>] ? cfs_timer_arm+0xe/0x10 [libcfs]
<4>[ 3724.374864] [<ffffffffa05df3cf>] ? lc_watchdog_touch+0x6f/0x170 [libcfs]
<4>[ 3724.381791] [<ffffffffa08ab699>] ? ptlrpc_wait_event+0xa9/0x2d0 [ptlrpc]
<4>[ 3724.388691] [<ffffffff810546b9>] ? __wake_up_common+0x59/0x90
<4>[ 3724.394666] [<ffffffffa08b537d>] ptlrpc_main+0xaed/0x1920 [ptlrpc]
<4>[ 3724.402340] [<ffffffffa08b4890>] ? ptlrpc_main+0x0/0x1920 [ptlrpc]
<4>[ 3724.408702] [<ffffffff8109ab56>] kthread+0x96/0xa0
<4>[ 3724.413684] [<ffffffff8100c20a>] child_rip+0xa/0x20
<4>[ 3724.418754] [<ffffffff8109aac0>] ? kthread+0x0/0xa0
<4>[ 3724.423841] [<ffffffff8100c200>] ? child_rip+0x0/0x20
crash> bt ffff8807e2630080
PID: 15771 TASK: ffff8807e2630080 CPU: 20 COMMAND: "mdt01_000"
#0 [ffff8807e26817f8] schedule at ffffffff81528020
#1 [ffff8807e26818c0] rwsem_down_failed_common at ffffffff8152a6d5
#2 [ffff8807e2681920] rwsem_down_read_failed at ffffffff8152a866
#3 [ffff8807e2681960] call_rwsem_down_read_failed at ffffffff8128eab4
#4 [ffff8807e26819c8] mdt_object_open_lock at ffffffffa0f2569d [mdt]
#5 [ffff8807e2681a38] mdt_reint_open at ffffffffa0f2dcc7 [mdt]
#6 [ffff8807e2681b18] mdt_reint_rec at ffffffffa0f16611 [mdt]
#7 [ffff8807e2681b38] mdt_reint_internal at ffffffffa0efbe63 [mdt]
#8 [ffff8807e2681b78] mdt_intent_reint at ffffffffa0efc3ee [mdt]
#9 [ffff8807e2681bc8] mdt_intent_policy at ffffffffa0ef9bce [mdt]
#10 [ffff8807e2681c08] ldlm_lock_enqueue at ffffffffa085a2e5 [ptlrpc]
#11 [ffff8807e2681c78] ldlm_handle_enqueue0 at ffffffffa0883ccf [ptlrpc]
#12 [ffff8807e2681ce8] mdt_enqueue at ffffffffa0efa096 [mdt]
#13 [ffff8807e2681d08] mdt_handle_common at ffffffffa0efec5a [mdt]
#14 [ffff8807e2681d58] mds_regular_handle at ffffffffa0f3b945 [mdt]
#15 [ffff8807e2681d68] ptlrpc_server_handle_request at ffffffffa08b4015 [ptlrpc]
#16 [ffff8807e2681e48] ptlrpc_main at ffffffffa08b537d [ptlrpc]
#17 [ffff8807e2681ee8] kthread at ffffffff8109ab56
#18 [ffff8807e2681f48] kernel_thread at ffffffff8100c20a
info
[ 3724.166886] INFO: task mdt00_003:15957 blocked for more than 120 seconds.
[ 3724.173775] Tainted: P --------------- 2.6.32-431.17.1.el6.wc.x86_64 #1
[ 3724.182236] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 3724.190222] mdt00_003 D 0000000000000010 0 15957 2 0x00000000
[ 3724.197292] ffff8807e1f118b8 0000000000000046 0000000000000000 ffffffffa0f5b6eb
[ 3724.205018] ffff8810021cf190 ffff8810021cf138 ffff8808326a0538 ffffffffa0f5b6eb
[ 3724.212759] ffff8807f846fab8 ffff8807e1f11fd8 000000000000fbc8 ffff8807f846fab8
[ 3724.220491] Call Trace:
[ 3724.223032] [<ffffffff8152a6d5>] rwsem_down_failed_common+0x95/0x1d0
[ 3724.229597] [<ffffffffa0d9d3fb>] ? ldiskfs_xattr_trusted_get+0x2b/0x30 [ldiskfs]
[ 3724.237247] [<ffffffff811ae017>] ? generic_getxattr+0x87/0x90
[ 3724.243199] [<ffffffff8152a866>] rwsem_down_read_failed+0x26/0x30
[ 3724.249499] [<ffffffffa0fe8083>] ? lod_xattr_get+0x153/0x420 [lod]
[ 3724.255867] [<ffffffff8128eab4>] call_rwsem_down_read_failed+0x14/0x30
[ 3724.262580] [<ffffffff81529d64>] ? down_read+0x24/0x30
[ 3724.267923] [<ffffffffa0f2569d>] mdt_object_open_lock+0x1ed/0x9d0 [mdt]
[ 3724.274736] [<ffffffffa0f077e0>] ? mdt_attr_get_complex+0x520/0x7f0 [mdt]
[ 3724.281720] [<ffffffffa0f2dcc7>] mdt_reint_open+0x15b7/0x2150 [mdt]
[ 3724.288187] [<ffffffffa05e9f76>] ? upcall_cache_get_entry+0x296/0x880 [libcfs]
[ 3724.295688] [<ffffffffa073fc10>] ? lu_ucred+0x20/0x30 [obdclass]
[ 3724.301900] [<ffffffffa0f16611>] mdt_reint_rec+0x41/0xe0 [mdt]
[ 3724.307916] [<ffffffffa0efbe63>] mdt_reint_internal+0x4c3/0x780 [mdt]
[ 3724.314551] [<ffffffffa0efc3ee>] mdt_intent_reint+0x1ee/0x520 [mdt]
[ 3724.321023] [<ffffffffa0ef9bce>] mdt_intent_policy+0x3ae/0x770 [mdt]
[ 3724.327619] [<ffffffffa085a2e5>] ldlm_lock_enqueue+0x135/0x950 [ptlrpc]
[ 3724.334445] [<ffffffffa0883ccf>] ldlm_handle_enqueue0+0x50f/0x10c0 [ptlrpc]
[ 3724.341608] [<ffffffffa0efa096>] mdt_enqueue+0x46/0xe0 [mdt]
[ 3724.347484] [<ffffffffa0efec5a>] mdt_handle_common+0x52a/0x1470 [mdt]
[ 3724.354147] [<ffffffffa0f3b945>] mds_regular_handle+0x15/0x20 [mdt]
[ 3724.360670] [<ffffffffa08b4015>] ptlrpc_server_handle_request+0x385/0xc00 [ptlrpc]
[ 3724.368508] [<ffffffffa05ce4ce>] ? cfs_timer_arm+0xe/0x10 [libcfs]
[ 3724.374864] [<ffffffffa05df3cf>] ? lc_watchdog_touch+0x6f/0x170 [libcfs]
[ 3724.381791] [<ffffffffa08ab699>] ? ptlrpc_wait_event+0xa9/0x2d0 [ptlrpc]
[ 3724.388691] [<ffffffff810546b9>] ? __wake_up_common+0x59/0x90
[ 3724.394666] [<ffffffffa08b537d>] ptlrpc_main+0xaed/0x1920 [ptlrpc]
[ 3724.402340] [<ffffffffa08b4890>] ? ptlrpc_main+0x0/0x1920 [ptlrpc]
[ 3724.408702] [<ffffffff8109ab56>] kthread+0x96/0xa0
[ 3724.413684] [<ffffffff8100c20a>] child_rip+0xa/0x20
[ 3724.418754] [<ffffffff8109aac0>] ? kthread+0x0/0xa0
[ 3724.423841] [<ffffffff8100c200>] ? child_rip+0x0/0x20