[root@neel1024 127.0.0.1-2015.05.22-17:54:34] # cd ../../neel1062/127.0.0.1-2015.06.19-16:50:56 crash --hex /ccc/scratch/cont001/sysdebug/neel1062/127.0.0.1-2015.06.19-16:50:56/vmlinux /ccc/scratch/cont001/sysdebug/neel1062/127.0.0.1-2015.06.19-16:50:56/vmcore crash 7.0.9-4.el7 Copyright (C) 2002-2014 Red Hat, Inc. Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation Copyright (C) 1999-2006 Hewlett-Packard Co Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited Copyright (C) 2006, 2007 VA Linux Systems Japan K.K. Copyright (C) 2005, 2011 NEC Corporation Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc. Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. This program is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Enter "help copying" to see the conditions. This program has absolutely no warranty. Enter "help warranty" for details. GNU gdb (GDB) 7.6 Copyright (C) 2013 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "x86_64-unknown-linux-gnu"... KERNEL: /ccc/scratch/cont001/sysdebug/neel1062/127.0.0.1-2015.06.19-16:50:56/vmlinux DUMPFILE: /ccc/scratch/cont001/sysdebug/neel1062/127.0.0.1-2015.06.19-16:50:56/vmcore [PARTIAL DUMP] CPUS: 64 DATE: Fri Jun 19 16:50:47 2015 UPTIME: 9 days, 04:54:40 LOAD AVERAGE: 3.13, 2.47, 1.06 TASKS: 1111 NODENAME: neel1062 RELEASE: 3.10.0-229.el7.x86_64 VERSION: #1 SMP Thu Jan 29 18:37:38 EST 2015 MACHINE: x86_64 (2294 Mhz) MEMORY: 127.6 GB PANIC: "Kernel panic - not syncing: LBUG" PID: 40201 COMMAND: "testsApiC++-gcc" TASK: ffff880e6f474440 [THREAD_INFO: ffff880eeff90000] CPU: 6 STATE: TASK_RUNNING (PANIC) crash> bt PID: 40201 TASK: ffff880e6f474440 CPU: 6 COMMAND: "testsApiC++-gcc" #0 [ffff880eeff93638] machine_kexec at ffffffff8104c4cb #1 [ffff880eeff93698] crash_kexec at ffffffff810e1fe2 #2 [ffff880eeff93768] panic at ffffffff815fd7e1 #3 [ffff880eeff937e8] lbug_with_loc at ffffffffa0473e5b [libcfs] #4 [ffff880eeff93808] osc_extent_find at ffffffffa0becdf2 [osc] #5 [ffff880eeff93990] osc_queue_async_io at ffffffffa0be4bf0 [osc] #6 [ffff880eeff93ad8] osc_page_cache_add at ffffffffa0bd2463 [osc] #7 [ffff880eeff93b00] osc_io_commit_async at ffffffffa0bd9162 [osc] #8 [ffff880eeff93b60] cl_io_commit_async at ffffffffa06f4007 [obdclass] #9 [ffff880eeff93ba8] lov_io_commit_async at ffffffffa09ecbea [lov] #10 [ffff880eeff93c08] cl_io_commit_async at ffffffffa06f4007 [obdclass] #11 [ffff880eeff93c50] vvp_io_write_commit at ffffffffa0b0007a [lustre] #12 [ffff880eeff93cb0] vvp_io_write_start at ffffffffa0b00aa6 [lustre] #13 [ffff880eeff93d00] cl_io_start at ffffffffa06f3875 [obdclass] #14 [ffff880eeff93d28] cl_io_loop at ffffffffa06f6c95 [obdclass] #15 [ffff880eeff93d58] ll_file_io_generic at ffffffffa0a9f85c [lustre] #16 [ffff880eeff93e60] ll_file_aio_write at ffffffffa0aa00ce [lustre] #17 [ffff880eeff93ea8] ll_file_write at ffffffffa0aa02b2 [lustre] #18 [ffff880eeff93ef8] vfs_write at ffffffff811c65dd #19 [ffff880eeff93f38] sys_write at ffffffff811c7028 #20 [ffff880eeff93f80] system_call_fastpath at ffffffff81613da9 RIP: 00007f8d6bbc39fd RSP: 00007fff791cd238 RFLAGS: 00010216 RAX: 0000000000000001 RBX: ffffffff81613da9 RCX: 000000000000003f RDX: 0000000005c00000 RSI: 00007f8bce395038 RDI: 0000000000000020 RBP: 00007f8bce395038 R8: 00000000003ffffe R9: 00000000003ffff4 R10: 00000000003ffff5 R11: 0000000000000293 R12: 0000000005c00000 R13: 0000000005c00000 R14: 0000000006f656c0 R15: 0000000005c00000 ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b crash> 595 /** 596 * Find or create an extent which includes @index, core function to manage 597 * extent tree. 598 */ 599 static struct osc_extent *osc_extent_find(const struct lu_env *env, 600 struct osc_object *obj, pgoff_t index, 601 unsigned int *grants) 602 { 603 struct client_obd *cli = osc_cli(obj); 604 struct osc_lock *olck; 605 struct cl_lock_descr *descr; 606 struct osc_extent *cur; 607 struct osc_extent *ext; 608 struct osc_extent *conflict = NULL; 609 struct osc_extent *found = NULL; 102 #define EXTSTR "[%lu -> %lu/%lu]" 103 #define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end [794898.170269] LustreError: 40201:0:(osc_cache.c:662:osc_extent_find()) ASSERTION( (max_end - cur->oe_start) < max_pages ) failed: [35840 -> 511/511] [794898.170280] LustreError: 40201:0:(osc_cache.c:662:osc_extent_find()) LBUG cur->oe_start = 35840 (ext)->oe_end = 511 (ext)->oe_max_end = 511 0xffffffffa0be4bc4 : je 0xffffffffa0be4e2a 0xffffffffa0be4bca : cmp -0x60(%rbp),%eax 0xffffffffa0be4bcd : ja 0xffffffffa0be5504 0xffffffffa0be4bd3 : mov -0x60(%rbp),%eax 0xffffffffa0be4bd6 : mov -0x58(%rbp),%rdx 0xffffffffa0be4bda : lea -0x4c(%rbp),%rcx 0xffffffffa0be4bde : mov %r15,%rsi 0xffffffffa0be4be1 : mov %r12,%rdi 0xffffffffa0be4be4 : mov %r10,-0x68(%rbp) 0xffffffffa0be4be8 : mov %eax,-0x4c(%rbp) 0xffffffffa0be4beb : callq 0xffffffffa0bec981 crash> dis osc_extent_find 0xffffffffa0bec981 : nopl 0x0(%rax,%rax,1) [FTRACE NOP] 0xffffffffa0bec986 : push %rbp 0xffffffffa0bec987 : mov %rsp,%rbp 0xffffffffa0bec98a : push %r15 0xffffffffa0bec98c : push %r14 0xffffffffa0bec98e : push %r13 0xffffffffa0bec990 : push %r12 0xffffffffa0bec992 : push %rbx 0xffffffffa0bec993 : sub $0x150,%rsp 0xffffffffa0bec99a : mov 0x8(%rsi),%rax 0xffffffffa0bec99e : mov %rdi,-0xc0(%rbp) 0xffffffffa0bec9a5 : mov $0x1,%edi 0xffffffffa0bec9aa : mov %rdx,-0xc8(%rbp) 0xffffffffa0bec9b1 : mov %rsi,-0xa8(%rbp) ffff880eeff93910: ffff880eeff93968 ffffffffa048a700 ffff880eeff93920: ffff880fe7857000 ffff880d00000014 ffff880eeff93930: ffff880036883648 0000000000008c00 ffff880eeff93940: ffff880a00000000 00000000da996a86 ffff880eeff93950: ffffffffa09feb7f 00000000da996a86 ffff880eeff93960: ffff880db336ef20 ffff881eff598548 ffff880eeff93970: ffff881aaa6e44f8 ffff880fe7e54a48 ffff880eeff93980: ffff880a99eab0b0 ffff880eeff93ad0 ffff880eeff93990: ffffffffa0be4bf0 #5 [ffff880eeff93990] osc_queue_async_io at ffffffffa0be4bf0 [osc] 496 static inline struct osc_device *lu2osc_dev(const struct lu_device *d) 497 { 498 LINVRNT(d->ld_type == &osc_device_type); 499 return container_of0(d, struct osc_device, od_cl.cd_lu_dev); 500 } 501 502 static inline struct obd_export *osc_export(const struct osc_object *obj) 503 { 504 return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp; 505 } 506 507 static inline struct client_obd *osc_cli(const struct osc_object *obj) 508 { 509 return &osc_export(obj)->exp_obd->u.cli; 510 } crash> struct osc_object ffff880a99eab0b0 struct osc_object { oo_cl = { co_lu = { lo_header = 0xffff880ec1f66738, lo_dev = 0xffff880fe7843900, lo_ops = 0xffffffffa0bf2580 , lo_linkage = { next = 0xffff880ec1f66778, prev = 0xffff880ec1f667b0 }, lo_dev_ref = {} }, co_ops = 0xffffffffa0bf25c0 , co_slice_off = 0x120 }, oo_oinfo = 0xffff88101f34ac80, oo_contended = 0x0, oo_contention_time = 0x0, oo_inflight = {{ next = 0xffff880a99eab100, prev = 0xffff880a99eab100 }, { next = 0xffff881f0c05bfe0, prev = 0xffff880dc91717e0 }}, oo_seatbelt = { { rlock = { raw_lock = { { head_tail = 0x36e036e, tickets = { head = 0x36e, tail = 0x36e } } } } } }, oo_ready_item = { next = 0xffff880a99eab128, prev = 0xffff880a99eab128 }, oo_hp_ready_item = { next = 0xffff880a99eab138, prev = 0xffff880a99eab138 }, oo_write_item = { next = 0xffff880fe7e54b50, prev = 0xffff880fe7e54b50 }, oo_read_item = { next = 0xffff880a99eab158, prev = 0xffff880a99eab158 }, oo_root = { rb_node = 0xffff880a80470930 }, oo_hp_exts = { next = 0xffff880a99eab170, prev = 0xffff880a99eab170 }, oo_urgent_exts = { next = 0xffff880a99eab180, prev = 0xffff880a99eab180 }, oo_rpc_exts = { next = 0xffff880a99eab190, prev = 0xffff880a99eab190 }, oo_reading_exts = { next = 0xffff880a99eab1a0, prev = 0xffff880a99eab1a0 }, oo_nr_reads = { counter = 0x0 }, oo_nr_writes = { counter = 0x0 }, oo_lock = { { rlock = { raw_lock = { { head_tail = 0x6ec06ec, tickets = { head = 0x6ec, tail = 0x6ec } } } } } }, oo_tree = { height = 0x3, gfp_mask = 0x0, rnode = 0xffff880fa1c2f6a1 }, oo_tree_lock = { { rlock = { raw_lock = { { head_tail = 0x40024002, tickets = { head = 0x4002, tail = 0x4002 } } } } } }, oo_npages = 0x2000, oo_ol_spin = { { rlock = { raw_lock = { { head_tail = 0x780078, tickets = { head = 0x78, tail = 0x78 } } } } } }, oo_ol_list = { next = 0xffff880036963a80, prev = 0xffff880fb2051e60 } } crash> p (*(struct osc_object *)0xffff880a99eab0b0).oo_cl $1 = { co_lu = { lo_header = 0xffff880ec1f66738, lo_dev = 0xffff880fe7843900, lo_ops = 0xffffffffa0bf2580 , lo_linkage = { next = 0xffff880ec1f66778, prev = 0xffff880ec1f667b0 }, lo_dev_ref = {} }, co_ops = 0xffffffffa0bf25c0 , co_slice_off = 0x120 } crash> p (*(struct osc_object *)0xffff880a99eab0b0).oo_cl.co_lu.lo_dev $2 = (struct lu_device *) 0xffff880fe7843900 crash> struct osc_device struct osc_device { struct cl_device od_cl; struct obd_export *od_exp; struct osc_stats od_stats; int od_contention_time; int od_lockless_truncate; } SIZE: 0x70 crash> struct cl_device struct cl_device { struct lu_device cd_lu_dev; const struct cl_device_operations *cd_ops; } SIZE: 0x48 crash> struct osc_device.od_exp 0xffff880fe7843900 od_exp = 0xffff880fe7fd1000 crash> struct obd_export.exp_obd 0xffff880fe7fd1000 exp_obd = 0xffff880fe7e54588 crash> p (*(struct obd_export *)0xffff880fe7fd1000).exp_obd.u.cli $3 = { cl_sem = { count = 0x0, wait_lock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } }, wait_list = { next = 0xffff880fe7e54a58, prev = 0xffff880fe7e54a58 } }, cl_target_uuid = { uuid = "store0-OST01f3_UUID\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" }, cl_import = 0xffff880fe7eda000, cl_conn_count = 0x1, crash> p &(*(struct obd_export *)0xffff880fe7fd1000).exp_obd.u.cli $4 = (struct client_obd *) 0xffff880fe7e54a48 crash> struct client_obd 0xffff880fe7e54a48 struct client_obd { cl_sem = { count = 0x0, wait_lock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } }, wait_list = { next = 0xffff880fe7e54a58, prev = 0xffff880fe7e54a58 } }, cl_target_uuid = { uuid = "store0-OST01f3_UUID\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" }, cl_import = 0xffff880fe7eda000, cl_conn_count = 0x1, cl_default_mds_easize = 0x0, cl_max_mds_easize = 0x30, cl_default_mds_cookiesize = 0x0, cl_max_mds_cookiesize = 0x20, cl_sp_me = LUSTRE_SP_CLI, cl_sp_to = LUSTRE_SP_OST, cl_flvr_mgc = { sf_rpc = 0x0, sf_flags = 0x0, u_rpc = {}, u_bulk = { hash = { hash_alg = 0x0 } } }, cl_dirty_pages = 0x1c6, cl_dirty_max_pages = 0x4000, cl_dirty_transit = 0x0, cl_avail_grant = 0x3a000, cl_lost_grant = 0x0, cl_reserved_grant = 0xf000, cl_cache_waiters = { next = 0xffff880fe7e54af8, prev = 0xffff880fe7e54af8 }, cl_next_shrink_grant = 0x12f74bff1, cl_grant_shrink_list = { next = 0xffff880fe7e54b10, prev = 0xffff880fe7e54b10 }, cl_grant_shrink_interval = 0x4b0, cl_chunkbits = 0xc, cl_extent_tax = 0x0, cl_loi_list_lock = { { rlock = { raw_lock = { { head_tail = 0x7100710, tickets = { head = 0x710, tail = 0x710 } } } } } }, cl_loi_ready_list = { next = 0xffff880fe7e54b30, prev = 0xffff880fe7e54b30 }, cl_loi_hp_ready_list = { next = 0xffff880fe7e54b40, prev = 0xffff880fe7e54b40 }, cl_loi_write_list = { next = 0xffff880a99eab148, prev = 0xffff880a99eab148 }, cl_loi_read_list = { next = 0xffff880fe7e54b60, prev = 0xffff880fe7e54b60 }, cl_r_in_flight = 0x0, cl_w_in_flight = 0x0, cl_pending_w_pages = { counter = 0x0 }, cl_pending_r_pages = { counter = 0x0 }, cl_max_pages_per_rpc = 0x100, cl_max_rpcs_in_flight = 0x20, cl_read_rpc_hist = { oh_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, oh_buckets = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} }, cl_write_rpc_hist = { oh_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, oh_buckets = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} }, cl_read_page_hist = { oh_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, oh_buckets = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} }, cl_write_page_hist = { oh_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, oh_buckets = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} }, cl_read_offset_hist = { oh_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, oh_buckets = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} }, cl_write_offset_hist = { oh_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, oh_buckets = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} }, cl_cache = 0xffff881000c1c0e8, cl_lru_osc = { next = 0xffff880fe6427b78, prev = 0xffff880fe23343d8 }, cl_lru_left = 0xffff881000c1c0f0, cl_lru_busy = { counter = 0x2000 }, cl_lru_in_list = { counter = 0x0 }, cl_unstable_count = { counter = 0x0 }, cl_lru_list = { next = 0xffff880fe7e551f0, prev = 0xffff880fe7e551f0 }, cl_lru_list_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, cl_lru_shrinkers = { counter = 0x1 }, cl_destroy_in_flight = { counter = 0x0 }, cl_destroy_waitq = { lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, task_list = { next = 0xffff880fe7e55218, prev = 0xffff880fe7e55218 } }, cl_rpc_lock = 0x0, cl_close_lock = 0x0, cl_mgc_mutex = { count = { counter = 0x1 }, wait_lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, wait_list = { next = 0xffff880fe7e55240, prev = 0xffff880fe7e55240 }, owner = 0x0, osq = 0x0 }, cl_mgc_los = 0x0, cl_mgc_configs_dir = 0x0, cl_mgc_refcount = { counter = 0x0 }, cl_mgc_mgsexp = 0x0, cl_checksum = 0x1, cl_supp_cksum_types = 0x6, cl_cksum_type = OBD_CKSUM_ADLER, cl_ar = { ar_rc = 0x0, ar_force_sync = 0x0, ar_min_xid = 0x0 }, cl_qchk_stat = 0x1, cl_seq = 0x0, cl_resends = { counter = 0xa }, cl_writeback_work = 0xffff880fe7899200, cl_lru_work = 0xffff880fe7899500, cl_quota_hash = {0xffff880fe78de240, 0xffff880fe78de300} } crash> crash> struct client_obd.cl_chunkbits 0xffff880fe7e54a48 cl_chunkbits = 0xc crash> 630 LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT); PAGE_CACHE_SHIFT = cli->cl_chunkbits = 12 631 ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT; ppc_bits = 0 632 chunk_mask = ~((1 << ppc_bits) - 1); chunk_mask = fffffff 633 chunksize = 1 << cli->cl_chunkbits; chunksize = 4096 634 chunk = index >> ppc_bits; chunk = 35840 >> 0 chunk = 35840 crash> x/a 0xffff880eeff93ad0 - 0x58 0xffff880eeff93a78: 0x8c00 crash> eval 0x8c00 hexadecimal: 8c00 (35KB) decimal: 35840 octal: 106000 binary: 0000000000000000000000000000000000000000000000001000110000000000 crash> 635 636 /* align end to rpc edge, rpc size may not be a power 2 integer. */ 637 max_pages = cli->cl_max_pages_per_rpc; max_pages = 0x100 638 LASSERT((max_pages & ~chunk_mask) == 0); 639 max_end = index - (index % max_pages) + max_pages - 1; max_end = 35840 - 0 + 256 - 1 = 36095 640 max_end = min_t(pgoff_t, max_end, descr->cld_end); 641 crash> struct lu_env ffff881eff598548 struct lu_env { le_ctx = { lc_tags = 0x10000008, lc_state = LCS_ENTERED, lc_thread = 0x0, lc_value = 0xffff881e7c21e800, lc_remember = { next = 0xffff881eff598560, prev = 0xffff881eff598560 }, lc_version = 0x12, lc_cookie = 0x0 }, le_ses = 0xffff881eff598580 } crash> p/x osc_session_key $5 = { lct_tags = 0x10, lct_init = 0xffffffffa0bcfde0, lct_fini = 0xffffffffa0bcf8d0, lct_exit = 0x0, lct_index = 0xa, lct_used = { counter = 0x6db }, lct_owner = 0xffffffffa0c05ca0, lct_reference = {} } crash> p/x &osc_session_key $6 = 0xffffffffa0bfefe0 crash> struct lu_context 0xffff881eff598580 struct lu_context { lc_tags = 0x10, lc_state = LCS_ENTERED, lc_thread = 0x0, lc_value = 0xffff881e7c21ec00, lc_remember = { next = 0xffff881eff598598, prev = 0xffff881eff598598 }, lc_version = 0x12, lc_cookie = 0x0 } crash> struct lu_context_key 0xffffffffa0bfefe0 struct lu_context_key { lct_tags = 0x10, lct_init = 0xffffffffa0bcfde0 , lct_fini = 0xffffffffa0bcf8d0 , lct_exit = 0x0, lct_index = 0xa, lct_used = { counter = 0x6db }, lct_owner = 0xffffffffa0c05ca0 <__this_module>, lct_reference = {} } crash> p ((struct lu_context *)0xffff881eff598580).lc_value[0xa] $8 = (void *) 0xffff881aaa6e44f8 crash> struct osc_session.os_io 0xffff881aaa6e44f8 os_io = { oi_cl = { cis_io = 0xffff881008c46800, cis_obj = 0xffff880a99eab0b0, cis_iop = 0xffffffffa0bf2d60 , cis_linkage = { next = 0xffff881008c46818, prev = 0xffff881008c46818 } }, oi_lockless = 0x0, oi_lru_reserved = 0x0, oi_active = 0x0, oi_trunc = 0x0, oi_write_osclock = 0xffff880036883648, oi_info = { oi_policy = { l_extent = { start = 0x0, end = 0x0, gid = 0x0 }, l_flock = { start = 0x0, end = 0x0, owner = 0x0, blocking_owner = 0x0, blocking_export = 0x0, blocking_refs = 0x0, pid = 0x0 }, l_inodebits = { bits = 0x0 } }, oi_flags = 0x0, oi_md = 0x0, oi_oa = 0x0, oi_osfs = 0x0, oi_cb_up = 0x0, oi_capa = 0x0 }, oi_oa = { o_valid = 0x0, o_oi = { { oi = { oi_id = 0x0, oi_seq = 0x0 }, oi_fid = { f_seq = 0x0, f_oid = 0x0, f_ver = 0x0 } } }, o_parent_seq = 0x0, o_size = 0x0, o_mtime = 0x0, o_atime = 0x0, o_ctime = 0x0, o_blocks = 0x0, o_grant = 0x0, o_blksize = 0x0, o_mode = 0x0, o_uid = 0x0, o_gid = 0x0, o_flags = 0x0, o_nlink = 0x0, o_parent_oid = 0x0, o_misc = 0x0, o_ioepoch = 0x0, o_stripe_idx = 0x0, o_parent_ver = 0x0, o_handle = { cookie = 0x0 }, o_lcookie = { lgc_lgl = { lgl_oi = { { oi = { oi_id = 0x0, oi_seq = 0x0 }, oi_fid = { f_seq = 0x0, f_oid = 0x0, f_ver = 0x0 } } }, lgl_ogen = 0x0 }, lgc_subsys = 0x0, lgc_index = 0x0, lgc_padding = 0x0 }, o_uid_h = 0x0, o_gid_h = 0x0, o_data_version = 0x0, o_padding_4 = 0x0, o_padding_5 = 0x0, o_padding_6 = 0x0 }, oi_cbarg = { opc_rpc_sent = 0x0, opc_rc = 0x0, opc_sync = { done = 0x0, wait = { lock = { { rlock = { raw_lock = { { head_tail = 0x0, tickets = { head = 0x0, tail = 0x0 } } } } } }, task_list = { next = 0x0, prev = 0x0 } } } } } crash> p ((struct osc_session *)0xffff881aaa6e44f8).os_io $9 = { oi_cl = { cis_io = 0xffff881008c46800, cis_obj = 0xffff880a99eab0b0, cis_iop = 0xffffffffa0bf2d60 , cis_linkage = { next = 0xffff881008c46818, prev = 0xffff881008c46818 } }, oi_lockless = 0x0, oi_lru_reserved = 0x0, oi_active = 0x0, oi_trunc = 0x0, oi_write_osclock = 0xffff880036883648, oi_info = { oi_policy = { l_extent = { start = 0x0, end = 0x0, gid = 0x0 }, l_flock = { start = 0x0, crash> p &((struct osc_session *)0xffff881aaa6e44f8).os_io $10 = (struct osc_io *) 0xffff881aaa6e44f8 crash> struct osc_session struct osc_session { struct osc_io os_io; } SIZE: 0x1a8 crash> struct osc_io.oi_write_osclock 0xffff881aaa6e44f8 oi_write_osclock = 0xffff880036883648 crash> olck = 0xffff880036883648 crash> p ((struct osc_lock *)0xffff880036883648).ols_cl.cls_lock.cll_descr $11 = { cld_obj = 0xffff880ec1f66798, cld_start = 0x0, cld_end = 0x1ff, cld_gid = 0x0, cld_mode = CLM_WRITE, cld_enq_flags = 0x0 } crash> p &((struct osc_lock *)0xffff880036883648).ols_cl.cls_lock.cll_descr $12 = (struct cl_lock_descr *) 0xffff880eb5877cc0 crash> Client Log before the issue : [794853.077788] Lustre: DEBUG MARKER: Fri Jun 19 16:50:01 2015 [794854.251591] Lustre: 4103:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1434725384/real 1434725392] req@ffff882003d1d700 x1503584900042692/t0(0) o4->store0-OST0253-osc-ffff88201fcb5800@QQ.P.BBO.WZ@o2ib2:6/4 lens 488/448 e 0 to 1 dl 1434725403 ref 2 fl Rpc:X/0/ffffffff rc 0/-1 [794854.284079] Lustre: store0-OST0253-osc-ffff88201fcb5800: Connection to store0-OST0253 (at QQ.P.BBO.WZ@o2ib2) was lost; in progress operations using this service will wait for recovery to complete [794854.303572] Lustre: Skipped 1 previous similar message [794855.303100] Lustre: 4130:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1434725384/real 1434725393] req@ffff8820082c8300 x1503584900044004/t0(0) o4->store0-OST0045-osc-ffff88201fcb5800@QQ.P.BBO.FB@o2ib2:6/4 lens 488/448 e 0 to 1 dl 1434725403 ref 2 fl Rpc:X/0/ffffffff rc 0/-1 [794855.304069] Lustre: store0-OST00da-osc-ffff88201fcb5800: Connection to store0-OST00da (at QQ.P.BBO.AL@o2ib2) was lost; in progress operations using this service will wait for recovery to complete [794855.304070] Lustre: Skipped 1 previous similar message [794855.360912] Lustre: 4130:0:(client.c:1939:ptlrpc_expire_one_request()) Skipped 10 previous similar messages [794856.303572] Lustre: 4140:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1434725384/real 1434725393] req@ffff88200818fb00 x1503584900043636/t0(0) o4->store0-OST0045-osc-ffff88201fcb5800@QQ.P.BBO.FB@o2ib2:6/4 lens 488/448 e 0 to 1 dl 1434725403 ref 2 fl Rpc:X/0/ffffffff rc 0/-1 [794856.305600] Lustre: store0-OST02ce-osc-ffff88201fcb5800: Connection to store0-OST02ce (at QQ.P.BBO.WW@o2ib2) was lost; in progress operations using this service will wait for recovery to complete [794856.305601] Lustre: Skipped 1 previous similar message [794856.361363] Lustre: 4140:0:(client.c:1939:ptlrpc_expire_one_request()) Skipped 34 previous similar messages [794858.306639] Lustre: 4098:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1434725386/real 1434725395] req@ffff880a96457800 x1503584900052244/t0(0) o4->store0-OST019e-osc-ffff88201fcb5800@QQ.P.BBO.PO@o2ib2:6/4 lens 488/448 e 0 to 1 dl 1434725407 ref 2 fl Rpc:X/0/ffffffff rc 0/-1 [794858.307621] Lustre: store0-OST011c-osc-ffff88201fcb5800: Connection to store0-OST011c (at QQ.P.BBO.AO@o2ib2) was lost; in progress operations using this service will wait for recovery to complete [794858.307622] Lustre: Skipped 11 previous similar messages [794858.364635] Lustre: 4098:0:(client.c:1939:ptlrpc_expire_one_request()) Skipped 48 previous similar messages [794863.310205] Lustre: 4092:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for sent delay: [sent 1434725405/real 0] req@ffff880e6b10bf00 x1503584900100448/t0(0) o8->store0-OST00a9-osc-ffff88201fcb5800@QQ.P.BBO.AP@o2ib2:28/4 lens 400/544 e 0 to 1 dl 1434725412 ref 2 fl Rpc:XN/0/ffffffff rc 0/-1 [794863.342012] Lustre: 4092:0:(client.c:1939:ptlrpc_expire_one_request()) Skipped 4 previous similar messages [794863.502159] Lustre: store0-OST0124-osc-ffff88201fcb5800: Connection to store0-OST0124 (at QQ.P.BBO.PP@o2ib2) was lost; in progress operations using this service will wait for recovery to complete [794863.521661] Lustre: Skipped 3 previous similar messages [794873.249416] Lustre: 4113:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1434725399/real 1434725411] req@ffff880fda15f500 x1503584900096364/t0(0) o4->store0-OST00da-osc-ffff88201fcb5800@QQ.P.BBO.AL@o2ib2:6/4 lens 488/448 e 0 to 1 dl 1434725422 ref 2 fl Rpc:X/0/ffffffff rc 0/-1 [794873.281913] Lustre: 4113:0:(client.c:1939:ptlrpc_expire_one_request()) Skipped 54 previous similar messages [794873.574515] Lustre: store0-OST0277-osc-ffff88201fcb5800: Connection restored to store0-OST0277 (at QQ.P.BBO.WI@o2ib2) [794874.328561] Lustre: store0-OST01b5-osc-ffff88201fcb5800: Connection restored to store0-OST01b5 (at QQ.P.BBO.IA@o2ib2) [794874.340499] Lustre: Skipped 2 previous similar messages [794891.094781] Lustre: 4143:0:(client.c:1939:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1434725388/real 1434725396] req@ffff881e29993300 x1503584900095200/t0(0) o4->store0-OST002b-osc-ffff88201fcb5800@QQ.P.BBO.FP@o2ib2:6/4 lens 488/448 e 1 to 1 dl 1434725439 ref 2 fl Rpc:X/0/ffffffff rc 0/-1 [794891.127268] Lustre: 4143:0:(client.c:1939:ptlrpc_expire_one_request()) Skipped 52 previous similar messages [794894.288763] Lustre: store0-OST0045-osc-ffff88201fcb5800: Connection restored to store0-OST0045 (at QQ.P.BBO.FB@o2ib2) [794894.300700] Lustre: Skipped 2 previous similar messages [794896.511870] Lustre: store0-OST01f3-osc-ffff88201fcb5800: Connection restored to store0-OST01f3 (at QQ.P.BBO.II@o2ib2) [794896.523812] Lustre: Skipped 3 previous similar messages [794898.170214] LustreError: 40197:0:(osc_cache.c:662:osc_extent_find()) ASSERTION( (max_end - cur->oe_start) < max_pages ) failed: [5120 -> 511/511] [794898.170218] LustreError: 40208:0:(osc_cache.c:662:osc_extent_find()) ASSERTION( (max_end - cur->oe_start) < max_pages ) failed: [25600 -> 511/511] [794898.170220] LustreError: 40208:0:(osc_cache.c:662:osc_extent_find()) LBUG [794898.170221] Pid: 40208, comm: testsApiC++-gcc Server Log during the issue : 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern err kernel LustreError: 7762:0:(ldlm_lib.c:2725:target_bulk_io()) @@@ Reconnect on bulk GET req@ffff88016d217800 x15 04024242801916/t0(0) o4->a14acc0a-5043-3db8-a75f-d54e69bdebaf@JO.BOO.BZP.BO@o2ib3:0/0 lens 488/448 e 0 to 0 dl 1434725423 ref 1 fl Interpret:/0/0 rc 0/0 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern err kernel LustreError: 7762:0:(ldlm_lib.c:2725:target_bulk_io()) Skipped 1 previous similar message 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern warning kernel Lustre: store0-OST022a: Bulk IO write error with a14acc0a-5043-3db8-a75f-d54e69bdebaf (at JO.BOO.BZP.B 0@o2ib3), client will retry: rc -110 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern err kernel LustreError: 3044:0:(ldlm_lib.c:2725:target_bulk_io()) @@@ Reconnect on bulk GET req@ffff880166e98800 x15 04024242812096/t0(0) o4->a14acc0a-5043-3db8-a75f-d54e69bdebaf@JO.BOO.BZP.BO@o2ib3:0/0 lens 488/448 e 0 to 0 dl 1434725425 ref 1 fl Interpret:/0/0 rc 0/0 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern err kernel LustreError: 3044:0:(ldlm_lib.c:2725:target_bulk_io()) Skipped 9 previous similar messages 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern warning kernel Lustre: store0-OST022a: Bulk IO write error with a14acc0a-5043-3db8-a75f-d54e69bdebaf (at JO.BOO.BZP.B 0@o2ib3), client will retry: rc -110 1434725415 2015 Jun 19 16:50:15 bigfoot40 kern warning kernel Lustre: Skipped 9 previous similar messages 1434725416 2015 Jun 19 16:50:16 bigfoot40 kern warning kernel Lustre: store0-OST022e: Client 86b98655-fffd-2f0c-0322-06b00fa9af64 (at JO.BOO.BZP.PW@o2ib3) refused reconnection, still busy with 2 active RPCs 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern err kernel LustreError: 12819:0:(ldlm_lib.c:2725:target_bulk_io()) @@@ Reconnect on bulk GET req@ffff88016fc71c00 x1503584902840048/t0(0) o4->86b98655-fffd-2f0c-0322-06b00fa9af64@JO.BOO.BZP.PW@o2ib3:0/0 lens 488/448 e 0 to 0 dl 1434725424 ref 1 fl Interpret:/0/0 rc 0/0 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern err kernel LustreError: 12819:0:(ldlm_lib.c:2725:target_bulk_io()) Skipped 4 previous similar messages 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern warning kernel Lustre: store0-OST022e: Bulk IO write error with 86b98655-fffd-2f0c-0322-06b00fa9af64 (at JO.BOO.BZP.PW@o2ib3), client will retry: rc -110 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern warning kernel Lustre: Skipped 5 previous similar messages 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern err kernel LustreError: 12658:0:(ldlm_lib.c:2680:target_bulk_io()) @@@ bulk GET failed: rc -107 req@ffff88016ce0c800 x1503584902842496/t0(0) o4->86b98655-fffd-2f0c-0322-06b00fa9af64@JO.BOO.BZP.PW@o2ib3:0/0 lens 488/448 e 0 to 0 dl 1434725468 ref 1 fl Interpret:/0/0 rc 0/0 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern err kernel LustreError: 12658:0:(ldlm_lib.c:2680:target_bulk_io()) Skipped 1 previous similar message 1434725417 2015 Jun 19 16:50:17 bigfoot40 kern warning kernel Lustre: store0-OST0228: Client c8cbf750-2d0c-cfa9-7186-8038da5cd933 (at JO.BOO.BZP.AA@o2ib3) refused reconnection, still busy with 20 active RPCs #define ETIMEDOUT 110 /* Connection timed out */