Uploaded image for project: 'Lustre'
  1. Lustre
  2. LU-1544

lfs catinfo command crashes

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Critical
    • Lustre 2.3.0, Lustre 2.1.4
    • Lustre 2.0.0, Lustre 2.1.0, Lustre 2.2.0, Lustre 2.3.0, Lustre 2.1.1, Lustre 2.1.2
    • None
    • rhel6.2, lustre 2.2.53
    • 3
    • 4550

    Description

      The system crashes when running "lfs catinfo config ." on a lustre client.

      The processing of the OBD_IOC_LLOG_CATINFO in ll_dir_ioctl() expects the obd device pointed by sbi->ll_md_exp->exp_obd to be a "mdc" device but it is a "lmv" device, which leads to a GPF in ptlrpc_request_alloc_internal().

              case OBD_IOC_LLOG_CATINFO: {
                      struct ptlrpc_request *req = NULL;
                      char                  *buf = NULL;
                      char                  *str;
                      int                    len = 0;
      
                      rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
                      if (rc)
                              RETURN(rc);
                      data = (void *)buf;
      
                      if (!data->ioc_inlbuf1) {
                              obd_ioctl_freedata(buf, len);
                              RETURN(-EINVAL);
                      }
      
                      req = ptlrpc_request_alloc(sbi2mdc(sbi)->cl_import,
                                                 &RQF_LLOG_CATINFO);
      

      Here are the informations retrieved from the dump.

      crash> bt
      PID: 4631   TASK: ffff88005495ab00  CPU: 0   COMMAND: "lfs"
       #0 [ffff88005d347a50] machine_kexec at ffffffff81031fcb
       #1 [ffff88005d347ab0] crash_kexec at ffffffff810b8f72
       #2 [ffff88005d347b80] oops_end at ffffffff814f07f0
       #3 [ffff88005d347bb0] die at ffffffff8100f26b
       #4 [ffff88005d347be0] do_general_protection at ffffffff814f0382
       #5 [ffff88005d347c10] general_protection at ffffffff814efb55
          [exception RIP: ptlrpc_request_alloc_internal+99]
          RIP: ffffffffa051b233  RSP: ffff88005d347cc8  RFLAGS: 00010293
          RAX: 5a5a5a5a5a5a5a5a  RBX: ffff88003ea6d400  RCX: ffff880037de7900
          RDX: 0000000000000000  RSI: 0000000000000000  RDI: 0000000000000000
          RBP: ffff88005d347ce8   R8: 0000000000000000   R9: 0000000000000250
          R10: 0000000000000250  R11: 0000000000000000  R12: 3838666666662d76
          R13: ffffffffa05da9e0  R14: ffff88003ea6d400  R15: 00007fffb82b1120
          ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
       #6 [ffff88005d347cf0] ptlrpc_request_alloc at ffffffffa051b4c3 [ptlrpc]
       #7 [ffff88005d347d00] ll_dir_ioctl at ffffffffa0aae59e [lustre]
       #8 [ffff88005d347e60] vfs_ioctl at ffffffff81189342
       #9 [ffff88005d347ea0] do_vfs_ioctl at ffffffff811894e4
      #10 [ffff88005d347f30] sys_ioctl at ffffffff81189a61
      #11 [ffff88005d347f80] system_call_fastpath at ffffffff8100b0f2
          RIP: 00007faf80fb9847  RSP: 00007fffb82af0f8  RFLAGS: 00010202
          RAX: 0000000000000010  RBX: ffffffff8100b0f2  RCX: 0000000001967050
          RDX: 00007fffb82b1120  RSI: 00000000c00866c4  RDI: 0000000000000003
          RBP: 00007fffb82b33c0   R8: 0000000000000003   R9: 0000000000800000
          R10: 00007fffb82aee80  R11: 0000000000000202  R12: 0000000000000000
          R13: 00007fffb82b3120  R14: 000000000195f030  R15: 00007fffb82b1120
          ORIG_RAX: 0000000000000010  CS: 0033  SS: 002b
      
      crash> mount
          VFSMOUNT         SUPERBLK     TYPE   DEVNAME                      DIRNAME
      ffff88002468ee80 ffff88005d2ed000 lustre 20.1.12.10@tcp:/lustre       /mnt/lustre
      
      crash> p ((struct super_block *)0xffff88005d2ed000)->s_fs_info
      $3 = (void *) 0xffff880054bb7000
      
      crash> p ((struct lustre_sb_info *)0xffff880054bb7000)->lsi_llsbi->ll_md_exp->exp_obd->u.cli
      $4 = {
        cl_sem = {
          count = 1,
          wait_lock = {
            raw_lock = {
              slock = 1421633408
            }
          },
          wait_list = {
            next = 0xffff880054b23a00,
            prev = 0xffff880054b23a00
          }
        },
        cl_target_uuid = {
          uuid = "\240\337!\240\377\377\377\377\001\000\000\000\001\000\001\000@h\274T\000\210\377\377cli-lustre-clilm"
        },
        cl_import = 0x3838666666662d76,   <== invalid content
      
      crash> p ((struct lustre_sb_info *)0xffff880054bb7000)->lsi_llsbi->ll_md_exp->exp_obd
      $5 = (struct obd_device *) 0xffff880054bd0278
      
      crash> struct obd_device 0xffff880054bd0278
      struct obd_device {
        obd_type = 0xffff880037fa08c0,
        obd_magic = 2874988271,
        obd_name = "lustre-clilmv-ffff88005d2ed000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
        obd_uuid = {
          uuid = "51facc6e-cb5b-ab32-55cf-27f8db30245d\000\000\000"
        },
      

      The same issue seems to have been reported in the bugzilla #17289.

      Since the catinfo service has not been maintained and is deprecated, I am going to post a patch that removes the catinfo code, both client and server part.

      Attachments

        Activity

          People

            bogl Bob Glossman (Inactive)
            pichong Gregoire Pichon
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: