Uploaded image for project: 'Lustre'
  1. Lustre
  2. LU-5290

Oops in cl_req_completion()

    XMLWordPrintable

Details

    • 3
    • 14758

    Description

      In cl_req_alloc() if we cannot allocate req->crq_o then we pass an unitialized request to cl_req_completion().

      struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
                                  enum cl_req_type crt, int nr_objects)
      {
              struct cl_req *req;
      
              LINVRNT(nr_objects > 0);
              ENTRY;
      
              OBD_ALLOC_PTR(req);
              if (req != NULL) {
                      int result;
      
                      OBD_ALLOC(req->crq_o, nr_objects * sizeof req->crq_o[0]);
                      if (req->crq_o != NULL) {
                              req->crq_nrobjs = nr_objects;
                              req->crq_type = crt;
                              CFS_INIT_LIST_HEAD(&req->crq_pages);
                              CFS_INIT_LIST_HEAD(&req->crq_layers);
                              result = cl_req_init(env, req, page);
                      } else
                              result = -ENOMEM;
                      if (result != 0) {
                              cl_req_completion(env, req, result);
                              req = ERR_PTR(result);
                      }
              } else
                      req = ERR_PTR(-ENOMEM);
              RETURN(req);
      }
      

      In cl_req_completion() this will Oops when we try to walk crq_layers().

      [ 3063.937684] BUG: unable to handle kernel NULL pointer dereference at (null)
      [ 3063.938591] IP: [<ffffffffa046dd46>] cl_req_completion+0x46/0x4a0 [obdclass]
      [ 3063.940089] PGD 1e25ae067 PUD 1db4a0067 PMD 0
      [ 3063.940089] Oops: 0000 [#1] SMP
      [ 3063.940089] last sysfs file: /sys/devices/system/cpu/online
      [ 3063.940089] CPU 5
      [ 3063.940089] Modules linked in: lustre(U) ofd(U) osp(U) lod(U) ost(U) mdt(U) mdd(U) mgs\
      (U) nodemap(U) osd_ldiskfs(U) ldiskfs(U) exportfs lquota(U) lfsck(U) jbd obdecho(U) mgc(U\
      ) lov(U) osc(U) mdc(U) lmv(U) fid(U) fld(U) ptlrpc(U) obdclass(U) ksocklnd(U) lnet(U) sha\
      512_generic sha256_generic libcfs(U) autofs4 nfs lockd fscache auth_rpcgss nfs_acl sunrpc\
       ipv6 microcode virtio_balloon virtio_net i2c_piix4 i2c_core ext4 jbd2 mbcache virtio_blk\
       virtio_pci virtio_ring virtio pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm\
      _log dm_mod [last unloaded: speedstep_lib]
      [ 3063.949064]
      [ 3063.949064] Pid: 2419, comm: ldlm_bl_01 Not tainted 2.6.32-431.5.1.el6.lustre.x86_64 #\
      1 Bochs Bochs
      [ 3063.949064] RIP: 0010:[<ffffffffa046dd46>]  [<ffffffffa046dd46>] cl_req_completion+0x4\
      6/0x4a0 [obdclass]
      [ 3063.949064] RSP: 0018:ffff8802183937c0  EFLAGS: 00010286
      [ 3063.949064] RAX: 0000000000000000 RBX: ffff8801da8afa88 RCX: 0000000000000000
      [ 3063.949064] RDX: 00000000fffffff4 RSI: fffffffffffffff0 RDI: ffff880215b18390
      [ 3063.949064] RBP: ffff8802183937e0 R08: 0000000000000000 R09: ffff8801de50e420
      [ 3063.949064] R10: 09f911029d74e35b R11: 00000000000000c8 R12: ffff880215b18390
      [ 3063.949064] R13: 00000000fffffff4 R14: ffff8801da8afab8 R15: ffff880206ab2de0
      [ 3063.949064] FS:  0000000000000000(0000) GS:ffff880030200000(0000) knlGS:00000000000000\
      00
      [ 3063.949064] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
      [ 3063.949064] CR2: 0000000000000000 CR3: 00000001e20bf000 CR4: 00000000000006e0
      [ 3063.949064] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      [ 3063.949064] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
      [ 3063.949064] Process ldlm_bl_01 (pid: 2419, threadinfo ffff880218392000, task ffff88021\
      80605c0)
      [ 3063.949064] Stack:
      [ 3063.949064]  ffff8801da8afa88 ffff880215b18390 fffffffffffffff4 0000000000000001
      [ 3063.949064] <d> ffff880218393830 ffffffffa046e537 0000000000000001 00000001de9fb988
      [ 3063.949064] <d> ffff8801f15fdb48 0000000000000000 ffff880206ab2de0 ffff8802183938e0
      [ 3063.949064] Call Trace:
      [ 3063.949064]  [<ffffffffa046e537>] cl_req_alloc+0x397/0x470 [obdclass]
      [ 3063.949064]  [<ffffffffa0949c41>] osc_build_rpc+0x3e1/0x1750 [osc]
      [ 3063.949064]  [<ffffffffa0962dad>] ? osc_extent_make_ready+0x70d/0xbf0 [osc]
      [ 3063.949064]  [<ffffffff8105a1c2>] ? __wake_up+0x32/0x70
      [ 3063.949064]  [<ffffffffa0966b08>] osc_io_unplug0+0x1528/0x1f20 [osc]
      [ 3063.949064]  [<ffffffff812aa82d>] ? pointer+0x8d/0x830
      [ 3063.949064]  [<ffffffff812a961c>] ? put_dec+0x10c/0x110
      [ 3063.949064]  [<ffffffff812a990e>] ? number+0x2ee/0x320
      [ 3063.949064]  [<ffffffff812aa82d>] ? pointer+0x8d/0x830
      [ 3063.949064]  [<ffffffffa0969601>] osc_io_unplug+0x11/0x20 [osc]
      [ 3063.949064]  [<ffffffffa096b466>] osc_cache_writeback_range+0xc76/0xfb0 [osc]
      [ 3063.949064]  [<ffffffffa02d5c2d>] ? cfs_hash_rw_lock+0x1d/0x30 [libcfs]
      [ 3063.949064]  [<ffffffffa02d5c2d>] ? cfs_hash_rw_lock+0x1d/0x30 [libcfs]
      [ 3063.949064]  [<ffffffffa0467a55>] ? cl_lock_mutex_get+0x85/0xe0 [obdclass]
      [ 3063.949064]  [<ffffffffa09568a4>] osc_lock_flush+0x84/0x280 [osc]
      [ 3063.949064]  [<ffffffffa0467a55>] ? cl_lock_mutex_get+0x85/0xe0 [obdclass]
      [ 3063.949064]  [<ffffffffa0956b86>] osc_lock_cancel+0xe6/0x1c0 [osc]
      [ 3063.949064]  [<ffffffffa0465ed5>] cl_lock_cancel0+0x75/0x160 [obdclass]
      [ 3063.949064]  [<ffffffffa0466afb>] cl_lock_cancel+0x13b/0x140 [obdclass]
      [ 3063.949064]  [<ffffffffa095817a>] osc_ldlm_blocking_ast+0x13a/0x350 [osc]
      [ 3063.949064]  [<ffffffffa0666150>] ldlm_handle_bl_callback+0x130/0x400 [ptlrpc]
      [ 3063.949064]  [<ffffffffa0668251>] ldlm_bl_thread_main+0x281/0x400 [ptlrpc]
      [ 3063.949064]  [<ffffffff81067bc0>] ? default_wake_function+0x0/0x20
      [ 3063.949064]  [<ffffffffa0667fd0>] ? ldlm_bl_thread_main+0x0/0x400 [ptlrpc]
      [ 3063.949064]  [<ffffffff8109eab6>] kthread+0x96/0xa0
      [ 3063.949064]  [<ffffffff8100c30a>] child_rip+0xa/0x20
      [ 3063.949064]  [<ffffffff81554710>] ? _spin_unlock_irq+0x30/0x40
      [ 3063.949064]  [<ffffffff8100bb10>] ? restore_args+0x0/0x30
      [ 3063.949064]  [<ffffffff8109ea20>] ? kthread+0x0/0xa0
      [ 3063.949064]  [<ffffffff8100c300>] ? child_rip+0x0/0x20
      

      This was found using fault injection.

      Attachments

        Issue Links

          Activity

            People

              jhammond John Hammond
              jhammond John Hammond
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: