Details
-
Bug
-
Resolution: Cannot Reproduce
-
Critical
-
None
-
Lustre 2.5.5
-
None
-
kernel-2.6.32-642.3.1.1chaos.1blueos_small_pages.bl2.2.ppc64
lustre-client-2.5.5-9chaos_2.6.32_642.3.1.1chaos.1blueos_small_pages.bl2.2.ppc64.ppc64
-
3
-
9223372036854775807
Description
Hitting crashes like this several times on BGQ LAC nodes. Suspect stack overrun. Sysadmins report messages like these prior to the crash:
Lustre: 4656:0:(lu_object.c:1695:keys_init()) maximum lustre stack 12448
<0>LustreError: 858:0:(osc_dev.c:125:osc_key_init()) ASSERTION( (!(((((gfp_t)0x10u) | ((gfp_t)0x40u))) != (((gfp_t)0x20u))) || (!(((current_thread_info()->preempt_count) & ((((1UL << (10))-1) << ((0 + 8) + 8)) | (((1UL << (8))-1) << (0 + 8)) | (((1UL << (1))-1) << (((0 + 8) + 8) + 10))))))) ) failed: <0>LustreError: 858:0:(osc_dev.c:125:osc_key_init()) LBUG <0>Kernel panic - not syncing: LBUG in interrupt. <0> <4>Call Trace: <4>[c000000083cb8ac0] [c0000000000139d4] .show_stack+0x74/0x1c0 (unreliable) <4>[c000000083cb8b70] [c0000000005f6544] .panic+0xc4/0x20c <4>[c000000083cb8c00] [d000000004dcb4a4] .lbug_with_loc+0xa4/0xc0 [libcfs] <4>[c000000083cb8c90] [d000000006bb90ec] .osc_key_init+0x1ec/0x340 [osc] <4>[c000000083cb8d50] [d000000006546024] .keys_fill+0x174/0x2b0 [obdclass] <4>[c000000083cb8e30] [d0000000065480b4] .lu_context_init+0x104/0x400 [obdclass] <4>[c000000083cb8ef0] [d0000000065483d0] .lu_env_init+0x20/0x50 [obdclass] <4>[c000000083cb8f70] [d000000006555220] .cl_env_new+0x100/0x500 [obdclass] <4>[c000000083cb9050] [d000000006555868] .cl_env_get+0x88/0x230 [obdclass] <4>[c000000083cb9100] [d000000006555a9c] .cl_env_nested_get+0x8c/0xf0 [obdclass] <4>[c000000083cb9190] [d0000000070fe68c] .ll_releasepage+0xbc/0x200 [lustre] <4>[c000000083cb9240] [c000000000154418] .try_to_release_page+0x68/0xa0 <4>[c000000083cb92b0] [c000000000179258] .shrink_page_list.clone.2+0x718/0x800 <4>[c000000083cb9470] [c0000000001796c4] .shrink_inactive_list+0x384/0x8c0 <4>[c000000083cb9660] [c000000000179ef0] .shrink_mem_cgroup_zone+0x2f0/0x5d0 <4>[c000000083cb9770] [c00000000017a2a8] .shrink_zone+0xd8/0x220 <4>[c000000083cb9880] [c00000000017b178] .zone_reclaim+0x388/0x650 <4>[c000000083cb9a40] [c00000000016eefc] .get_page_from_freelist+0x85c/0xa20 <4>[c000000083cb9be0] [c00000000016f47c] .__alloc_pages_nodemask+0x15c/0x980 <4>[c000000083cb9d90] [c0000000001b796c] .kmem_getpages+0x7c/0x1a0 <4>[c000000083cb9e20] [c0000000001b857c] .cache_grow+0x34c/0x370 <4>[c000000083cb9ee0] [c0000000001b887c] .cache_alloc_refill+0x2dc/0x320 <4>[c000000083cb9fc0] [c0000000001b9ef8] .__kmalloc+0x258/0x280 <4>[c000000083cba080] [d000000004f122f0] .LNetMDBind+0xe0/0x770 [lnet] <4>[c000000083cba160] [d000000006922b30] .ptl_send_buf+0x1a0/0x8a0 [ptlrpc] <4>[c000000083cba2b0] [d000000006926560] .ptl_send_rpc+0x760/0x1020 [ptlrpc] <4>[c000000083cba3e0] [d00000000690f7e8] .ptlrpc_send_new_req+0x508/0xc00 [ptlrpc] <4>[c000000083cba4c0] [d00000000691cd30] .ptlrpc_set_wait+0x7d0/0xda0 [ptlrpc] <4>[c000000083cba630] [d00000000691db7c] .ptlrpc_queue_wait+0xcc/0x370 [ptlrpc] <4>[c000000083cba6f0] [d000000006e7ddc0] .mdc_close+0x280/0x1150 [mdc] <4>[c000000083cba7e0] [d000000007334a48] .lmv_close+0x248/0x8f0 [lmv] <4>[c000000083cba8d0] [d00000000708fedc] .ll_close_inode_openhandle+0x3ec/0x1a70 [lustre] <4>[c000000083cba9f0] [d000000007091790] .ll_md_real_close+0x230/0x310 [lustre] <4>[c000000083cbaaa0] [d0000000070e65dc] .ll_md_blocking_ast+0x4ac/0xbc0 [lustre] <4>[c000000083cbac00] [d0000000068cb4f0] .ldlm_cancel_callback+0xa0/0x310 [ptlrpc] <4>[c000000083cbaca0] [d0000000068e91a8] .ldlm_cli_cancel_local+0xd8/0x7d0 [ptlrpc] <4>[c000000083cbad60] [d0000000068eae3c] .ldlm_cli_cancel_list_local+0x14c/0x410 [ptlrpc] <4>[c000000083cbae80] [d0000000068eee78] .ldlm_prep_elc_req+0x2f8/0x5e0 [ptlrpc] <4>[c000000083cbaf70] [d000000006e8d184] .mdc_intent_getattr_pack+0x124/0x4a0 [mdc] <4>[c000000083cbb040] [d000000006e924a0] .mdc_enqueue+0xc10/0x24b0 [mdc] <4>[c000000083cbb210] [d000000006e93fc0] .mdc_intent_lock+0x280/0x7b4 [mdc] <4>[c000000083cbb370] [d00000000733d204] .lmv_intent_lookup+0x274/0xab0 [lmv] <4>[c000000083cbb490] [d00000000733eb18] .lmv_intent_lock+0x388/0x4b0 [lmv] <4>[c000000083cbb590] [d0000000070e4e20] .ll_lookup_it+0x390/0xee0 [lustre] <4>[c000000083cbb700] [d0000000070e5a40] .ll_lookup_nd+0xd0/0x670 [lustre] <4>[c000000083cbb7c0] [c0000000001dc724] .do_lookup+0x254/0x2d0 <4>[c000000083cbb890] [c0000000001e03ec] .__link_path_walk+0x9dc/0x15b0 <4>[c000000083cbb9e0] [c0000000001e1378] .path_walk+0x98/0x180 <4>[c000000083cbba80] [c0000000001e169c] .filename_lookup+0x8c/0x100 <4>[c000000083cbbb20] [c0000000001e2810] .user_path_at+0x60/0xb0 <4>[c000000083cbbc70] [c0000000001d4bf4] .vfs_fstatat+0x64/0x110 <4>[c000000083cbbd30] [c0000000001d4ed4] .SyS_newstat+0x24/0x50 <4>[c000000083cbbe30] [c000000000008564] syscall_exit+0x0/0x40