Details
-
Bug
-
Resolution: Fixed
-
Blocker
-
None
-
None
-
3
-
5177
Description
Hit this today and Alex advises he have seen it locally too:
[14514.159140] Lustre: 8283:0:(ldlm_lib.c:2163:target_recovery_init()) RECOVERY: service lustre-OST0000, 2 recoverable clients, last_transno 4294967297 [14519.152723] Lustre: lustre-OST0000: Will be in recovery for at least 1:00, or until 2 clients reconnect [14519.155335] Lustre: lustre-OST0000: Recovery over after 0:01, of 2 clients 2 recovered and 0 were evicted. [14519.157522] Lustre: lustre-OST0000-osc-ffff88005cf92bf0: Connection restored to lustre-OST0000 (at 0@lo) [14519.158734] Lustre: 8209:0:(ofd_obd.c:1067:ofd_orphans_destroy()) lustre-OST0000: deleting orphan objects from 36 to 68 [14519.170121] LustreError: 8003:0:(osp_precreate.c:666:osp_precreate_reserve()) ASSERTION( d->opd_pre_last_created >= d->opd_pre_next ) failed: [14519.170600] LustreError: 8003:0:(osp_precreate.c:666:osp_precreate_reserve()) LBUG [14519.170988] Pid: 8003, comm: mdt01_001 [14519.171173] [14519.171173] Call Trace: [14519.171519] [<ffffffffa0b44915>] libcfs_debug_dumpstack+0x55/0x80 [libcfs] [14519.171756] [<ffffffffa0b44f27>] lbug_with_loc+0x47/0xb0 [libcfs] [14519.171981] [<ffffffffa07e8ac0>] osp_precreate_reserve+0x590/0x610 [osp] [14519.172247] [<ffffffffa0ef72dc>] ? lu_object_add_top+0x2c/0x30 [obdclass] [14519.172478] [<ffffffffa07e1750>] ? osp_object_alloc+0x120/0x290 [osp] [14519.172702] [<ffffffffa07e78f4>] osp_declare_object_create+0x154/0x470 [osp] [14519.172939] [<ffffffffa078655d>] lod_qos_declare_object_on+0xed/0x4c0 [lod] [14519.173206] [<ffffffffa0787f61>] lod_alloc_rr.clone.2+0x4c1/0xca0 [lod] [14519.173487] [<ffffffffa0789c17>] lod_qos_prep_create+0xfa7/0x18e4 [lod] [14519.173813] [<ffffffffa064f022>] ? osd_declare_inode_qid+0x1a2/0x270 [osd_ldiskfs] [14519.174249] [<ffffffffa0784a8b>] lod_declare_striped_object+0x14b/0x920 [lod] [14519.174254] [<ffffffffa0785568>] lod_declare_object_create+0x308/0x4f0 [lod] [14519.174261] [<ffffffffa0503c4f>] mdd_declare_object_create_internal+0xaf/0x1d0 [mdd] [14519.174275] [<ffffffffa0514aca>] mdd_create+0x39a/0x1550 [mdd] [14519.174287] [<ffffffffa06c3bc9>] mdt_reint_open+0x1079/0x1860 [mdt] [14519.174292] [<ffffffffa051a86e>] ? md_ucred+0x1e/0x60 [mdd] [14519.174298] [<ffffffffa068e655>] ? mdt_ucred+0x15/0x20 [mdt] [14519.174305] [<ffffffffa06ae0a1>] mdt_reint_rec+0x41/0xe0 [mdt] [14519.174312] [<ffffffffa06a7483>] mdt_reint_internal+0x4e3/0x7e0 [mdt] [14519.174319] [<ffffffffa06a7a4d>] mdt_intent_reint+0x1ed/0x500 [mdt] [14519.174325] [<ffffffffa06a33fe>] mdt_intent_policy+0x38e/0x770 [mdt] [14519.174350] [<ffffffffa130ddda>] ldlm_lock_enqueue+0x2ea/0x890 [ptlrpc] [14519.174369] [<ffffffffa13350d7>] ldlm_handle_enqueue0+0x4e7/0x1010 [ptlrpc] [14519.174376] [<ffffffffa06a3936>] mdt_enqueue+0x46/0x130 [mdt] [14519.174382] [<ffffffffa06971f2>] mdt_handle_common+0x932/0x1740 [mdt] [14519.174388] [<ffffffffa06980d5>] mdt_regular_handle+0x15/0x20 [mdt] [14519.174409] [<ffffffffa1363483>] ptlrpc_server_handle_request+0x463/0xe70 [ptlrpc] [14519.174417] [<ffffffffa0b4566e>] ? cfs_timer_arm+0xe/0x10 [libcfs] [14519.174437] [<ffffffffa135c171>] ? ptlrpc_wait_event+0xb1/0x2a0 [ptlrpc] [14519.174440] [<ffffffff81051f73>] ? __wake_up+0x53/0x70 [14519.174459] [<ffffffffa136601a>] ptlrpc_main+0xb9a/0x1960 [ptlrpc] [14519.174477] [<ffffffffa1365480>] ? ptlrpc_main+0x0/0x1960 [ptlrpc] [14519.174479] [<ffffffff8100c14a>] child_rip+0xa/0x20 [14519.174498] [<ffffffffa1365480>] ? ptlrpc_main+0x0/0x1960 [ptlrpc] [14519.174516] [<ffffffffa1365480>] ? ptlrpc_main+0x0/0x1960 [ptlrpc] [14519.174517] [<ffffffff8100c140>] ? child_rip+0x0/0x20 [14519.174518] [14519.174698] Kernel panic - not syncing: LBUG
I have a crashdump for this instance too.
the printout of the d structure:
crash> p *(struct osp_device *)0xffff880074b287f0
$4 = {
opd_dt_dev = {
dd_lu_dev = {
ld_ref = {
counter = 4
},
ld_type = 0xffffffffa07f6840,
ld_ops = 0xffffffffa07f2a20,
ld_site = 0xffff8800686d8168,
ld_proc_entry = 0x0,
ld_obd = 0xffff880051d30700,
ld_reference = {<No data fields>},
ld_linkage = {
next = 0xffff880074b28820,
prev = 0xffff880074b28820
}
},
dd_ops = 0xffffffffa07f2a40,
dd_txn_callbacks = {
next = 0xffff880074b28838,
prev = 0xffff880074b28838
}
},
opd_index = 0,
opd_storage_exp = 0xffff880074b2abf0,
opd_storage = 0xffff8800686d8000,
opd_last_used_file = 0xffff880066eebef0,
opd_last_used_id = 69,
opd_gap_start = 38,
opd_gap_count = 0,
opd_obd = 0xffff880051d30700,
opd_exp = 0x0,
opd_cluuid = {
uuid = "69dad957-3501-725e-808b-2f3d3f07ad60\000\000\000"
},
opd_connect_data = 0x0,
opd_connects = 1,
opd_proc_entry = 0x0,
opd_stats = 0x0,
opd_new_connection = 0,
opd_got_disconnected = 0,
opd_imp_connected = 1,
opd_imp_active = 1,
opd_imp_seen_connected = -1,
opd_recovery_completed = 1,
opd_pre_lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
opd_pre_next = 70,
opd_pre_last_created = 69,
opd_pre_reserved = 0,
opd_pre_thread = {
t_link = {
next = 0x0,
prev = 0x0
},
t_data = 0x0,
t_flags = 8,
t_id = 0,
t_pid = 0,
t_watchdog = 0x0,
t_svcpt = 0x0,
t_ctl_waitq = {
lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
task_list = {
next = 0xffff880074b28970,
prev = 0xffff880074b28970
}
},
t_env = 0x0,
t_name = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
},
opd_pre_waitq = {
lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
task_list = {
next = 0xffff880066ef7ed8,
prev = 0xffff880066ef7ed8
}
},
opd_pre_user_waitq = {
lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
task_list = {
next = 0xffff880074b289e8,
prev = 0xffff880074b289e8
}
},
opd_pre_status = 0,
opd_pre_grow_count = 32,
opd_pre_min_grow_count = 32,
opd_pre_max_grow_count = 20000,
opd_pre_grow_slow = 0,
opd_syn_lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
opd_syn_generation = {
mnt_cnt = 4298516872,
conn_cnt = 4298516872
},
opd_syn_changes = 0,
opd_syn_prev_done = 1,
opd_syn_thread = {
t_link = {
next = 0x0,
prev = 0x0
},
t_data = 0x0,
t_flags = 8,
t_id = 0,
t_pid = 0,
t_watchdog = 0x0,
t_svcpt = 0x0,
t_ctl_waitq = {
lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
task_list = {
next = 0xffff880074b28a98,
prev = 0xffff880074b28a98
}
},
t_env = 0x0,
t_name = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
},
opd_syn_waitq = {
lock = {
raw_lock = {
slock = 0
},
magic = 3735899821,
owner_cpu = 4294967295,
owner = 0xffffffffffffffff
},
task_list = {
next = 0xffff880075fefb08,
prev = 0xffff880075fefb08
}
},
opd_syn_committed_there = {
next = 0xffff880074b28af8,
prev = 0xffff880074b28af8
},
opd_syn_sync_in_progress = 0,
opd_syn_rpc_in_flight = 0,
opd_syn_max_rpc_in_flight = 8,
opd_syn_rpc_in_progress = 0,
opd_syn_max_rpc_in_progress = 4096,
opd_syn_txn_cb = {
dtc_txn_start = 0,
dtc_txn_stop = 0,
dtc_txn_commit = 0,
dtc_cookie = 0x0,
dtc_tag = 0,
dtc_linkage = {
next = 0x0,
prev = 0x0
}
},
opd_syn_last_used_id = 0,
opd_syn_last_committed_id = 0,
opd_syn_last_processed_id = 0,
opd_syn_tracker = 0xffff880066ee8f30,
opd_syn_ontrack = {
next = 0xffff880074b28b78,
prev = 0xffff880074b28b78
},
opd_statfs = {
os_type = 61267,
os_blocks = 46866,
os_bfree = 40461,
os_bavail = 37825,
os_files = 50016,
os_ffree = 49758,
os_fsid = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
os_bsize = 4096,
os_namelen = 255,
os_maxbytes = 0,
os_state = 0,
os_spare1 = 0,
os_spare2 = 0,
os_spare3 = 0,
os_spare4 = 0,
os_spare5 = 0,
os_spare6 = 0,
os_spare7 = 0,
os_spare8 = 0,
os_spare9 = 0
},
opd_statfs_fresh_till = 4298523335,
opd_statfs_timer = {
entry = {
next = 0xffff88009c9b9050,
prev = 0xffff880098ebfdb0
},
expires = 4298523335,
function = 0xffffffffa07e84d0 <osp_statfs_timer_cb>,
data = 18446612134272075760,
base = 0xffff88009c9b8000,
start_site = 0x0,
start_comm = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
start_pid = -1
},
opd_statfs_update_in_progress = 0,
opd_statfs_maxage = 5,
opd_symlink = 0xffff880066ee9ef0
}