Details
-
Bug
-
Resolution: Fixed
-
Critical
-
Lustre 2.0.0, Lustre 2.1.0, Lustre 2.2.0, Lustre 2.3.0, Lustre 2.1.1, Lustre 2.1.2
-
None
-
rhel6.2, lustre 2.2.53
Description
The system crashes when running "lfs catinfo config ." on a lustre client.
The processing of the OBD_IOC_LLOG_CATINFO in ll_dir_ioctl() expects the obd device pointed by sbi->ll_md_exp->exp_obd to be a "mdc" device but it is a "lmv" device, which leads to a GPF in ptlrpc_request_alloc_internal().
case OBD_IOC_LLOG_CATINFO: { struct ptlrpc_request *req = NULL; char *buf = NULL; char *str; int len = 0; rc = obd_ioctl_getdata(&buf, &len, (void *)arg); if (rc) RETURN(rc); data = (void *)buf; if (!data->ioc_inlbuf1) { obd_ioctl_freedata(buf, len); RETURN(-EINVAL); } req = ptlrpc_request_alloc(sbi2mdc(sbi)->cl_import, &RQF_LLOG_CATINFO);
Here are the informations retrieved from the dump.
crash> bt
PID: 4631 TASK: ffff88005495ab00 CPU: 0 COMMAND: "lfs"
#0 [ffff88005d347a50] machine_kexec at ffffffff81031fcb
#1 [ffff88005d347ab0] crash_kexec at ffffffff810b8f72
#2 [ffff88005d347b80] oops_end at ffffffff814f07f0
#3 [ffff88005d347bb0] die at ffffffff8100f26b
#4 [ffff88005d347be0] do_general_protection at ffffffff814f0382
#5 [ffff88005d347c10] general_protection at ffffffff814efb55
[exception RIP: ptlrpc_request_alloc_internal+99]
RIP: ffffffffa051b233 RSP: ffff88005d347cc8 RFLAGS: 00010293
RAX: 5a5a5a5a5a5a5a5a RBX: ffff88003ea6d400 RCX: ffff880037de7900
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff88005d347ce8 R8: 0000000000000000 R9: 0000000000000250
R10: 0000000000000250 R11: 0000000000000000 R12: 3838666666662d76
R13: ffffffffa05da9e0 R14: ffff88003ea6d400 R15: 00007fffb82b1120
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#6 [ffff88005d347cf0] ptlrpc_request_alloc at ffffffffa051b4c3 [ptlrpc]
#7 [ffff88005d347d00] ll_dir_ioctl at ffffffffa0aae59e [lustre]
#8 [ffff88005d347e60] vfs_ioctl at ffffffff81189342
#9 [ffff88005d347ea0] do_vfs_ioctl at ffffffff811894e4
#10 [ffff88005d347f30] sys_ioctl at ffffffff81189a61
#11 [ffff88005d347f80] system_call_fastpath at ffffffff8100b0f2
RIP: 00007faf80fb9847 RSP: 00007fffb82af0f8 RFLAGS: 00010202
RAX: 0000000000000010 RBX: ffffffff8100b0f2 RCX: 0000000001967050
RDX: 00007fffb82b1120 RSI: 00000000c00866c4 RDI: 0000000000000003
RBP: 00007fffb82b33c0 R8: 0000000000000003 R9: 0000000000800000
R10: 00007fffb82aee80 R11: 0000000000000202 R12: 0000000000000000
R13: 00007fffb82b3120 R14: 000000000195f030 R15: 00007fffb82b1120
ORIG_RAX: 0000000000000010 CS: 0033 SS: 002b
crash> mount
VFSMOUNT SUPERBLK TYPE DEVNAME DIRNAME
ffff88002468ee80 ffff88005d2ed000 lustre 20.1.12.10@tcp:/lustre /mnt/lustre
crash> p ((struct super_block *)0xffff88005d2ed000)->s_fs_info
$3 = (void *) 0xffff880054bb7000
crash> p ((struct lustre_sb_info *)0xffff880054bb7000)->lsi_llsbi->ll_md_exp->exp_obd->u.cli
$4 = {
cl_sem = {
count = 1,
wait_lock = {
raw_lock = {
slock = 1421633408
}
},
wait_list = {
next = 0xffff880054b23a00,
prev = 0xffff880054b23a00
}
},
cl_target_uuid = {
uuid = "\240\337!\240\377\377\377\377\001\000\000\000\001\000\001\000@h\274T\000\210\377\377cli-lustre-clilm"
},
cl_import = 0x3838666666662d76, <== invalid content
crash> p ((struct lustre_sb_info *)0xffff880054bb7000)->lsi_llsbi->ll_md_exp->exp_obd
$5 = (struct obd_device *) 0xffff880054bd0278
crash> struct obd_device 0xffff880054bd0278
struct obd_device {
obd_type = 0xffff880037fa08c0,
obd_magic = 2874988271,
obd_name = "lustre-clilmv-ffff88005d2ed000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
obd_uuid = {
uuid = "51facc6e-cb5b-ab32-55cf-27f8db30245d\000\000\000"
},
The same issue seems to have been reported in the bugzilla #17289.
Since the catinfo service has not been maintained and is deprecated, I am going to post a patch that removes the catinfo code, both client and server part.