Details
-
Bug
-
Resolution: Fixed
-
Blocker
-
Lustre 2.4.0
-
3
-
6925
Description
I was trying to run some IOR tests using a 2.3.58 based client and am running into what appear to be stack overflow issues. The test is an IOR write using 256 clients, with 16 tasks per client:
$ salloc -N256 -n$((256*16)) -pltest $ srun -- ior -a POSIX -Cegw -F -o /p/lstest/surya1/lu2139-ior/%d -t 1m -b 8G -i 5
The clients are all x86_64, with the default stack size (8k), which is important to note. Some info on an example crash that I've seen is below.
The console shows this:
LustreError: 7408:0:(osc_dev.c:129:osc_key_init()) ASSERTION( (!(CFS_ALLOC_IO != CFS_ALLOC_ATOMIC) || (!(((current_thread_info()->preempt_count) & ((((1UL << (10))-1) << ((0 + 8) + 8)) | (((1UL << (8))-1) << (0 + 8)) | (((1UL << (1))-1) << (((0 + 8) + 8) + 10))))))) ) failed: BUG: unable to handle kernel paging request at 00000001811be3e0 IP: [<ffffffff81052994>] update_curr+0x144/0x1f0 PGD 1024273067 PUD 0 Thread overran stack, or stack corrupted Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/cpu/cpu15/cache/index2/shared_cpu_map CPU 8 Modules linked in: lmv(U) mgc(U) lustre(U) lov(U) osc(U) mdc(U) fid(U) fld(U) ptlrpc(U) obdclass(U) lvfs(U) dm_round_robin scsi_dh_rdac sg sd_mod crc_t10dif zfs(P)(U) zcommon(P)(U) znvpair(P)(U) zavl(P)(U) zunicode(P)(U) spl(U) zlib_deflate acpi_cpufreq freq_table mperf ib_srp scsi_transport_srp scsi_tgt ko2iblnd(U) lnet(U) sha512_generic sha256_generic libcfs(U) ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa mlx4_ib ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod vhost_net macvtap macvlan tun kvm wmi sb_edac edac_core i2c_i801 i2c_core ahci iTCO_wdt iTCO_vendor_support ioatdma ipv6 nfs lockd fscache nfs_acl auth_rpcgss sunrpc igb dca mlx4_en mlx4_core [last unloaded: cpufreq_ondemand] Pid: 7408, comm: ior Tainted: P ---------------- 2.6.32-220.23.1.2chaos.ch5.x86_64 #1 appro gb812x-cn/S2600JF ...
The stack reported by crash:
crash> bt PID: 7408 TASK: ffff881028578aa0 CPU: 8 COMMAND: "ior" #0 [ffff88085c4039a0] machine_kexec at ffffffff8103216b #1 [ffff88085c403a00] crash_kexec at ffffffff810b8d12 #2 [ffff88085c403ad0] oops_end at ffffffff814f2c00 #3 [ffff88085c403b00] no_context at ffffffff810423fb #4 [ffff88085c403b50] __bad_area_nosemaphore at ffffffff81042685 #5 [ffff88085c403ba0] bad_area_nosemaphore at ffffffff81042753 #6 [ffff88085c403bb0] __do_page_fault at ffffffff81042e0d #7 [ffff88085c403cd0] do_page_fault at ffffffff814f4bde #8 [ffff88085c403d00] page_fault at ffffffff814f1f95 [exception RIP: update_curr+324] RIP: ffffffff81052994 RSP: ffff88085c403db8 RFLAGS: 00010086 RAX: ffff881028578aa0 RBX: 000000003feb8e00 RCX: ffff88082ff111c0 RDX: 0000000000018b88 RSI: 0000000000000000 RDI: ffff881028578ad8 RBP: ffff88085c403de8 R8: ffffffff8160b665 R9: 000000000000001a R10: 0000000000000010 R11: 000000000000001a R12: ffff88085c416028 R13: 0000000001a5b3e7 R14: 0000004135410580 R15: ffff881028578aa0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff88085c403df0] task_tick_fair at ffffffff81052f4b #10 [ffff88085c403e20] scheduler_tick at ffffffff81056791 #11 [ffff88085c403e60] update_process_times at ffffffff8107c7e2 #12 [ffff88085c403e90] tick_sched_timer at ffffffff810a0976 #13 [ffff88085c403ec0] __run_hrtimer at ffffffff8109580e #14 [ffff88085c403f10] hrtimer_interrupt at ffffffff81095bb6 #15 [ffff88085c403f90] smp_apic_timer_interrupt at ffffffff814f754b #16 [ffff88085c403fb0] apic_timer_interrupt at ffffffff8100bc13 --- <IRQ stack> --- #17 [ffff881022f56178] apic_timer_interrupt at ffffffff8100bc13 [exception RIP: _spin_unlock_irqrestore+23] RIP: ffffffff814f1ad7 RSP: ffff881022f56228 RFLAGS: 00000246 RAX: 0000000000000002 RBX: ffff881022f56228 RCX: 0000000000001b78 RDX: ffff880809274400 RSI: 0000000000000246 RDI: 0000000000000246 RBP: ffffffff8100bc0e R8: 0000000000000000 R9: ffffffff8163a940 R10: 0000000000000001 R11: 0000000000000000 R12: ffff880809274400 R13: ffffffffa0ab8878 R14: 0000000014208bc1 R15: ffff880840021b48 ORIG_RAX: ffffffffffffff10 CS: 0010 SS: 0018 #18 [ffff881022f56230] cfs_trace_unlock_tcd at ffffffffa034336c [libcfs] #19 [ffff881022f56250] libcfs_debug_vmsg2 at ffffffffa0354038 [libcfs] #20 [ffff881022f563b0] libcfs_debug_msg at ffffffffa03545f1 [libcfs] #21 [ffff881022f56410] osc_key_init at ffffffffa0a90e17 [osc] #22 [ffff881022f56430] keys_fill at ffffffffa06d8c3f [obdclass] #23 [ffff881022f56480] lu_context_init at ffffffffa06dcc6b [obdclass] #24 [ffff881022f564b0] lu_env_init at ffffffffa06dce3e [obdclass] #25 [ffff881022f564d0] cl_env_new at ffffffffa06e40bd [obdclass] #26 [ffff881022f56520] cl_env_get at ffffffffa06e4acb [obdclass] #27 [ffff881022f56560] lov_sub_get at ffffffffa0b3777d [lov] #28 [ffff881022f565c0] lov_page_subio at ffffffffa0b37c5d [lov] #29 [ffff881022f565f0] lov_page_own at ffffffffa0b311af [lov] #30 [ffff881022f56620] cl_page_own0 at ffffffffa06e8b5b [obdclass] #31 [ffff881022f56670] cl_page_own_try at ffffffffa06e8db3 [obdclass] #32 [ffff881022f56680] discard_pagevec at ffffffffa0a926a9 [osc] #33 [ffff881022f566d0] osc_lru_shrink at ffffffffa0a935d9 [osc] #34 [ffff881022f567c0] osc_lru_del at ffffffffa0a94aa6 [osc] #35 [ffff881022f56840] osc_page_delete at ffffffffa0a951b4 [osc] #36 [ffff881022f56870] cl_page_delete0 at ffffffffa06e99e5 [obdclass] #37 [ffff881022f568b0] cl_page_delete at ffffffffa06e9e62 [obdclass] #38 [ffff881022f568d0] ll_releasepage at ffffffffa0bfe41b [lustre] #39 [ffff881022f56920] try_to_release_page at ffffffff81110070 #40 [ffff881022f56930] shrink_page_list.clone.0 at ffffffff8112a501 #41 [ffff881022f56a60] shrink_inactive_list at ffffffff8112a8cb #42 [ffff881022f56c10] shrink_zone at ffffffff8112b5df #43 [ffff881022f56cc0] zone_reclaim at ffffffff8112c384 #44 [ffff881022f56da0] get_page_from_freelist at ffffffff81122834 #45 [ffff881022f56ec0] __alloc_pages_nodemask at ffffffff81123ab1 #46 [ffff881022f56fe0] kmem_getpages at ffffffff8115e2f2 #47 [ffff881022f57010] cache_grow at ffffffff8115e95f #48 [ffff881022f57080] cache_alloc_refill at ffffffff8115ebb2 #49 [ffff881022f570f0] __kmalloc at ffffffff8115f8d9 #50 [ffff881022f57140] cfs_alloc at ffffffffa0344c40 [libcfs] #51 [ffff881022f57170] ptlrpc_request_alloc_internal at ffffffffa085d407 [ptlrpc] #52 [ffff881022f571a0] ptlrpc_request_alloc_pool at ffffffffa085d66e [ptlrpc] #53 [ffff881022f571b0] osc_brw_prep_request at ffffffffa0a8451b [osc] #54 [ffff881022f57290] osc_build_rpc at ffffffffa0a8a513 [osc] #55 [ffff881022f573f0] osc_io_unplug0 at ffffffffa0aa642d [osc] #56 [ffff881022f57500] osc_io_unplug at ffffffffa0aa7ce1 [osc] #57 [ffff881022f57510] osc_enter_cache at ffffffffa0aa8473 [osc] #58 [ffff881022f576a0] osc_queue_async_io at ffffffffa0aae916 [osc] #59 [ffff881022f57830] osc_page_cache_add at ffffffffa0a94fc9 [osc] #60 [ffff881022f57870] cl_page_cache_add at ffffffffa06e61d7 [obdclass] #61 [ffff881022f57910] lov_page_cache_add at ffffffffa0b31325 [lov] #62 [ffff881022f57940] cl_page_cache_add at ffffffffa06e61d7 [obdclass] #63 [ffff881022f579e0] vvp_io_commit_write at ffffffffa0c1161d [lustre] #64 [ffff881022f57a50] cl_io_commit_write at ffffffffa06f5b1d [obdclass] #65 [ffff881022f57aa0] ll_commit_write at ffffffffa0be68be [lustre] #66 [ffff881022f57b00] ll_write_end at ffffffffa0bfe4e0 [lustre] #67 [ffff881022f57b30] generic_file_buffered_write at ffffffff81111684 #68 [ffff881022f57c00] __generic_file_aio_write at ffffffff81112f70 #69 [ffff881022f57cc0] generic_file_aio_write at ffffffff8111320f #70 [ffff881022f57d10] vvp_io_write_start at ffffffffa0c11f3c [lustre] #71 [ffff881022f57d50] cl_io_start at ffffffffa06f244a [obdclass] #72 [ffff881022f57d80] cl_io_loop at ffffffffa06f6d54 [obdclass] #73 [ffff881022f57db0] ll_file_io_generic at ffffffffa0bbda7b [lustre] #74 [ffff881022f57e20] ll_file_aio_write at ffffffffa0bbdce2 [lustre] #75 [ffff881022f57e80] ll_file_write at ffffffffa0bbeaac [lustre] #76 [ffff881022f57ef0] vfs_write at ffffffff81177b98 #77 [ffff881022f57f30] sys_write at ffffffff811785a1 #78 [ffff881022f57f80] system_call_fastpath at ffffffff8100b0f2 RIP: 00002aaaab24748d RSP: 00007fffffffcb50 RFLAGS: 00010246 RAX: 0000000000000001 RBX: ffffffff8100b0f2 RCX: 0000081f512ba39b RDX: 0000000000100000 RSI: 0000000000821000 RDI: 0000000000000009 RBP: 0000000000100000 R8: 00000000007ff070 R9: 0000000080000000 R10: 0000000000101008 R11: 0000000000000293 R12: 00000000007ff070 R13: 0000000000821000 R14: 0000000004900000 R15: 0000000000000000 ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
Doing the math, this stack shows about 7.5k of used space: 0xffff881022f57f80 - 0xffff881022f56230 = 7504, which is awfully close to 8k.
Now lets look at the thread_info structure for that thread:
crash> p *(struct task_struct *)0xffff881028578aa0 ... stack = 0xffff881022f56000, ... crash> p *(struct thread_info *)0xffff881022f56000 $5 = { task = 0xffffffff814f4cfa, exec_domain = 0xffff881022f56028, flags = 2167540686, status = 4294967295, cpu = 1072401920, preempt_count = 34824, addr_limit = { seg = 0 }, restart_block = { fn = 0xffff881022f56088, { { arg0 = 18446744071582132352, arg1 = 18446612201620201608, arg2 = 52, arg3 = 21474836533 }, futex = { uaddr = 0xffffffff81322880, val = 586506376, flags = 4294936592, bitset = 52, time = 21474836533, uaddr2 = 0x100000000000006 }, nanosleep = { index = -2127419264, rmtp = 0xffff881022f56088, compat_rmtp = 0x34, expires = 21474836533 }, poll = { ufds = 0xffffffff81322880, nfds = 586506376, has_timeout = -30704, tv_sec = 52, tv_nsec = 21474836533 } } }, sysenter_return = 0xffff881022f560f8, uaccess_err = -2119203296 }
This structure is definitely trashed and contains bogus information, probably a result of going over the default 8k stack size on x86_64.