Details

    • Bug
    • Resolution: Fixed
    • Major
    • Lustre 2.1.6
    • Lustre 2.1.3
    • None
    • Lustre 2.1.3 Bull.2.308
    • 3
    • 6773

    Description

      While doing a tar on a login node, the login node encountered a general protection fault in osc_send_oap_rpc.

      It looks like a race where the loi_oap_pages structure of osc_send_oap_rpc gets poisoned:

      general protection fault: 0000 [#1] SMP 
      last sysfs file: /sys/devices/pci0000:80/0000:80:02.0/0000:83:00.0/host7/rport-7:0-0/target7:0:0/7:0:0:18/state
      CPU 3 
      Modules linked in: tcp_diag inet_diag iptable_filter ip_tables nfs fscache lmv(U) mgc(U) lustre(U) lov(U) osc(U) mdc(U)
      lquota(U) fid(U) fld(U) ko2iblnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) nfsd lockd nfs_acl auth_rpcgss exportfs
      ipmi_devintf ipmi_si ipmi_msghandler sunrpc rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) ib_cm(U)
      ib_sa(U) ipv6 ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash
      dm_log dm_round_robin scsi_dh_rdac dm_multipath dm_mod uinput usbhid hid sg lpfc scsi_transport_fc scsi_tgt sb_edac
      edac_core i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support igb ioatdma dca ext4 mbcache jbd2 ehci_hcd sd_mod crc_t10dif
      ahci megaraid_sas [last unloaded: scsi_wait_scan]
      
      Pid: 30811, comm: tar Not tainted 2.6.32-220.23.1.bl6.Bull.28.8.x86_64 #1 Bull SAS bullx R/X9DRH-7TF/7F/iTF/iF
      RIP: 0010:[<ffffffffa09f8cb1>]  [<ffffffffa09f8cb1>] osc_send_oap_rpc+0x61/0x1b40 [osc]
      RSP: 0018:ffff8805da6f3588  EFLAGS: 00010296
      RAX: 5a5a5a5a5a5a5a42 RBX: ffff8804710c6a80 RCX: 5a5a5a5a5a5a5a5a
      RDX: ffff8804710c6a80 RSI: ffff881067b22648 RDI: ffff8804710c6aa8
      RBP: ffff8805da6f36b8 R08: ffff8804710c6a98 R09: 00000000000057bf
      R10: 0000000000000d9d R11: b000000000000000 R12: ffff881067b22648
      R13: ffff8804710c6a98 R14: ffff8805da6f3668 R15: ffff8804710c6a98
      FS:  00007f77e18f17a0(0000) GS:ffff880028260000(0000) knlGS:0000000000000000
      CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
      CR2: 00000032bcd9b4f8 CR3: 000000083eaac000 CR4: 00000000000406e0
      DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
      
      
      
      
      
      0xffffffffa09f8c50 <osc_send_oap_rpc>:  push   %rbp
      0xffffffffa09f8c51 <osc_send_oap_rpc+1>:        mov    %rsp,%rbp
      0xffffffffa09f8c54 <osc_send_oap_rpc+4>:        push   %r15
      0xffffffffa09f8c56 <osc_send_oap_rpc+6>:        push   %r14
      0xffffffffa09f8c58 <osc_send_oap_rpc+8>:        push   %r13
      0xffffffffa09f8c5a <osc_send_oap_rpc+10>:       push   %r12
      0xffffffffa09f8c5c <osc_send_oap_rpc+12>:       push   %rbx
      0xffffffffa09f8c5d <osc_send_oap_rpc+13>:       sub    $0x108,%rsp
      0xffffffffa09f8c64 <osc_send_oap_rpc+20>:       nopl   0x0(%rax,%rax,1)
      0xffffffffa09f8c69 <osc_send_oap_rpc+25>:       testb  $0x1,-0x4f27ec(%rip)        # 0xffffffffa0506484
      0xffffffffa09f8c70 <osc_send_oap_rpc+32>:       lea    -0x50(%rbp),%r14
      0xffffffffa09f8c74 <osc_send_oap_rpc+36>:       mov    %rdi,-0x78(%rbp)
      0xffffffffa09f8c78 <osc_send_oap_rpc+40>:       mov    %rsi,%r12
      0xffffffffa09f8c7b <osc_send_oap_rpc+43>:       mov    %rdx,-0xc0(%rbp)
      0xffffffffa09f8c82 <osc_send_oap_rpc+50>:       mov    %ecx,-0xac(%rbp)
      0xffffffffa09f8c88 <osc_send_oap_rpc+56>:       mov    %r8,%r15
      0xffffffffa09f8c8b <osc_send_oap_rpc+59>:       mov    %r14,-0x50(%rbp)
      0xffffffffa09f8c8f <osc_send_oap_rpc+63>:       mov    %r14,-0x48(%rbp)
      0xffffffffa09f8c93 <osc_send_oap_rpc+67>:       je     0xffffffffa09f8ca2
      0xffffffffa09f8c95 <osc_send_oap_rpc+69>:       testb  $0x8,-0x4f281c(%rip)        # 0xffffffffa0506480
      0xffffffffa09f8c9c <osc_send_oap_rpc+76>:       jne    0xffffffffa09f9500
      0xffffffffa09f8ca2 <osc_send_oap_rpc+82>:       mov    0x10(%r15),%rcx         <=  %rcx comes from %r15, which
      itself comes from %r8 = 5th argument of osc_send_oap_rpc
      0xffffffffa09f8ca6 <osc_send_oap_rpc+86>:       lea    0x10(%r15),%rdi
      0xffffffffa09f8caa <osc_send_oap_rpc+90>:       lea    -0x18(%rcx),%rax        <= %rax now equals
      to 5a5a5a5a5a5a5a42 coming from %rcx - x18
      0xffffffffa09f8cae <osc_send_oap_rpc+94>:       cmp    %rcx,%rdi
      
      0xffffffffa09f8cb1 <osc_send_oap_rpc+97>:       mov    0x18(%rax),%rdx           <= Crashed here
      
      0xffffffffa09f8cb5 <osc_send_oap_rpc+101>:      je     0xffffffffa09f8d50
      0xffffffffa09f8cbb <osc_send_oap_rpc+107>:      sub    $0x18,%rdx
      0xffffffffa09f8cbf <osc_send_oap_rpc+111>:      xor    %ecx,%ecx
      0xffffffffa09f8cc1 <osc_send_oap_rpc+113>:      jmp    0xffffffffa09f8d10
      0xffffffffa09f8cc3 <osc_send_oap_rpc+115>:      nopl   0x0(%rax,%rax,1)
      
      
      
      static int
      osc_send_oap_rpc(const struct lu_env *env, struct client_obd *cli,
                       struct lov_oinfo *loi,
                       int cmd, struct loi_oap_pages *lop)
      
      %r15 = %r8 = lop
      
      crash> struct loi_oap_pages ffff8804710c6a98
      struct loi_oap_pages {
        lop_pending = {
          next = 0x5a5a5a5a5a5a5a5a, 
          prev = 0x5a5a5a5a5a5a5a5a
        }, 
        lop_urgent = {
          next = 0x5a5a5a5a5a5a5a5a, 
          prev = 0x5a5a5a5a5a5a5a5a
        }, 
        lop_pending_group = {
          next = 0x5a5a5a5a5a5a5a5a, 
          prev = 0x5a5a5a5a5a5a5a5a
        }, 
        lop_num_pending = 1515870810
      }
      
      static int
      osc_send_oap_rpc(const struct lu_env *env, struct client_obd *cli,
                       struct lov_oinfo *loi,
                       int cmd, struct loi_oap_pages *lop)
      {
              struct ptlrpc_request *req;
              obd_count page_count = 0;
              struct osc_async_page *oap = NULL, *tmp;
              struct osc_brw_async_args *aa;
              const struct obd_async_page_ops *ops;
              CFS_LIST_HEAD(rpc_list);
              int srvlock = 0, mem_tight = 0;
              struct cl_object *clob = NULL;
              obd_off starting_offset = OBD_OBJECT_EOF;
              unsigned int ending_offset;
              int starting_page_off = 0;
              ENTRY;
      
              /* ASYNC_HP pages first. At present, when the lock the pages is
               * to be canceled, the pages covered by the lock will be sent out
               * with ASYNC_HP. We have to send out them as soon as possible. */
              cfs_list_for_each_entry_safe(oap, tmp, &lop->lop_urgent, oap_urgent_item) {
                                                      ^^^^^^^^^^^^^^^
                                                      == LI_POISON
      
                      if (oap->oap_async_flags & ASYNC_HP)
                              cfs_list_move(&oap->oap_pending_item, &rpc_list);
                      else if (!(oap->oap_brw_flags & OBD_BRW_SYNC))
                              /* only do this for writeback pages. */
                              cfs_list_move_tail(&oap->oap_pending_item, &rpc_list);
                      if (++page_count >= cli->cl_max_pages_per_rpc)
                              break;
              }
      

      I attach a file with the output of dmesg,ps,bt, bt -a and foreach bt, if you need more elements.

      Attachments

        1. crash-dump-properties
          856 kB
          Sebastien Piechurski

        Issue Links

          Activity

            [LU-2797] GPF in osc_send_oap_rpc

            Hi Bruno,

            The workload was exercised during a whole night without a crash.
            We can consider the bug as fixed.

            Thanks for the work !

            spiechurski Sebastien Piechurski added a comment - Hi Bruno, The workload was exercised during a whole night without a crash. We can consider the bug as fixed. Thanks for the work !

            Hello Seb,
            I spent some time to debug this last crash and understand what kind of deadlock you encountered.
            As you stated there was hung processes back-traces in the log and they were all with the same stack like following :

            INFO: task get_bench.bull:18887 blocked for more than 120 seconds.
            "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
            get_bench.bul D 0000000000000016     0 18887  18828 0x00000080
             ffff88084eaafcc8 0000000000000082 0000000000000001 5a5a5a5a5a5a5a5a
             5a5a5a5a5a5a5a5a ffff88084eaafd48 ffff88084eaafc48 ffffffffa04df92e
             ffff88085ceb43e0 ffff88084eaaffd8 000000000000db00 ffff88085ceb43e0
            Call Trace:
             [<ffffffffa04df92e>] ? cfs_free+0xe/0x10 [libcfs]
             [<ffffffff8117b997>] ? dput+0xc7/0x190
             [<ffffffff81487205>] rwsem_down_failed_common+0x95/0x1f0
             [<ffffffff81487383>] rwsem_down_write_failed+0x23/0x30
             [<ffffffff81262023>] call_rwsem_down_write_failed+0x13/0x20
             [<ffffffff81486832>] ? down_write+0x32/0x40
             [<ffffffffa0b2ee3b>] ll_setattr_raw+0x18b/0xf20 [lustre]
             [<ffffffffa0b2fc2f>] ll_setattr+0x5f/0x100 [lustre]
             [<ffffffff8117f4e8>] notify_change+0x168/0x340
             [<ffffffff81161343>] sys_fchmodat+0xc3/0x100
             [<ffffffff81161398>] sys_chmod+0x18/0x20
             [<ffffffff810030f2>] system_call_fastpath+0x16/0x1b
            

            and they were no-longer present at the time of the crash-dump. Which seems to indicate that, since they should be Un-interruptible at this point, the inode/ll_inode_info semaphore they were hung on became available, thus it was not really a dead-lock and also the Lustre code used there is far to be impacted by both patches.

            BTW, in the crash-dump I also found that 11 "cat" processes are hung since about 118s, all with the same stack-trace like following :

            PID: 17234  TASK: ffff880867c87080  CPU: 2   COMMAND: "cat"
             #0 [ffff88086473f138] schedule at ffffffff81484c05
             #1 [ffff88086473f200] io_schedule at ffffffff814853e3
             #2 [ffff88086473f220] sync_page at ffffffff810fd4ad
             #3 [ffff88086473f230] __wait_on_bit_lock at ffffffff81485c2a
             #4 [ffff88086473f280] __lock_page at ffffffff810fd447
             #5 [ffff88086473f2e0] vvp_page_own at ffffffffa0b6220a [lustre]
             #6 [ffff88086473f300] cl_page_own0 at ffffffffa05f80bb [obdclass]
             #7 [ffff88086473f350] cl_page_own at ffffffffa05f8330 [obdclass]
             #8 [ffff88086473f360] check_and_discard_cb at ffffffffa05fe9ff [obdclass]
             #9 [ffff88086473f3b0] cl_page_gang_lookup at ffffffffa05f9994 [obdclass]
            #10 [ffff88086473f460] cl_lock_page_out at ffffffffa05fb53b [obdclass]
            #11 [ffff88086473f4d0] osc_lock_flush at ffffffffa0a0177f [osc]
            #12 [ffff88086473f520] osc_lock_cancel at ffffffffa0a01819 [osc]
            #13 [ffff88086473f570] cl_lock_cancel0 at ffffffffa05fa1c5 [obdclass]
            #14 [ffff88086473f5a0] cl_lock_cancel at ffffffffa05fafeb [obdclass]
            #15 [ffff88086473f5c0] osc_ldlm_blocking_ast at ffffffffa0a02f7a [osc]
            #16 [ffff88086473f630] ldlm_cancel_callback at ffffffffa07031f0 [ptlrpc]
            #17 [ffff88086473f650] ldlm_cli_cancel_local at ffffffffa071d0cb [ptlrpc]
            #18 [ffff88086473f680] ldlm_cli_cancel_list_local at ffffffffa071f9bd [ptlrpc]
            #19 [ffff88086473f6e0] ldlm_cancel_resource_local at ffffffffa071fc62 [ptlrpc]
            #20 [ffff88086473f750] osc_destroy at ffffffffa09ec44f [osc]
            #21 [ffff88086473f800] lov_destroy at ffffffffa0a52cad [lov]
            #22 [ffff88086473f8f0] ll_objects_destroy at ffffffffa0b447b7 [lustre]
            #23 [ffff88086473fa20] ll_close_inode_openhandle at ffffffffa0b142a1 [lustre]
            #24 [ffff88086473faa0] ll_md_real_close at ffffffffa0b1514a [lustre]
            #25 [ffff88086473fad0] ll_md_close at ffffffffa0b15425 [lustre]
            #26 [ffff88086473fb80] ll_file_release at ffffffffa0b15a35 [lustre]
            #27 [ffff88086473fbd0] __fput at ffffffff81164a85
            #28 [ffff88086473fc20] fput at ffffffff81164bc5
            #29 [ffff88086473fc30] filp_close at ffffffff8116061d
            #30 [ffff88086473fc60] put_files_struct at ffffffff8105732f
            #31 [ffff88086473fcb0] exit_files at ffffffff810573f3
            #32 [ffff88086473fce0] do_exit at ffffffff81059475
            #33 [ffff88086473fd60] do_group_exit at ffffffff81059bc8
            #34 [ffff88086473fd90] get_signal_to_deliver at ffffffff8106eec6
            #35 [ffff88086473fe30] do_signal at ffffffff810022d5
            #36 [ffff88086473ff30] do_notify_resume at ffffffff81002af0
            #37 [ffff88086473ff50] int_signal at ffffffff810033c1
                RIP: 00000032bcad83f0  RSP: 00007fff228120a8  RFLAGS: 00000246
                RAX: fffffffffffffffc  RBX: 0000000000200000  RCX: ffffffffffffffff
                RDX: 0000000000200000  RSI: 00007fbf6c365000  RDI: 0000000000000003
                RBP: 00007fbf6c365000   R8: 00000000ffffffff   R9: 0000000000000000
                R10: 0000000000200fff  R11: 0000000000000246  R12: 0000000000000003
                R13: ffffffffffff8000  R14: 0000000000200000  R15: 00007fbf6c365000
                ORIG_RAX: 0000000000000000  CS: 0033  SS: 002b
            

            and also, since about 112s, the Lustre umount process that was launched as part of the shutdown :

            PID: 24287  TASK: ffff8810414317d0  CPU: 17  COMMAND: "umount"
             #0 [ffff88105cd07418] schedule at ffffffff81484c05
             #1 [ffff88105cd074e0] io_schedule at ffffffff814853e3
             #2 [ffff88105cd07500] sync_page at ffffffff810fd4ad
             #3 [ffff88105cd07510] __wait_on_bit_lock at ffffffff81485c2a
             #4 [ffff88105cd07560] __lock_page at ffffffff810fd447
             #5 [ffff88105cd075c0] vvp_page_own at ffffffffa0b6220a [lustre]
             #6 [ffff88105cd075e0] cl_page_own0 at ffffffffa05f80bb [obdclass]
             #7 [ffff88105cd07630] cl_page_own at ffffffffa05f8330 [obdclass]
             #8 [ffff88105cd07640] check_and_discard_cb at ffffffffa05fe9ff [obdclass]
             #9 [ffff88105cd07690] cl_page_gang_lookup at ffffffffa05f9994 [obdclass]
            #10 [ffff88105cd07740] cl_lock_page_out at ffffffffa05fb53b [obdclass]
            #11 [ffff88105cd077b0] osc_lock_flush at ffffffffa0a0177f [osc]
            #12 [ffff88105cd07800] osc_lock_cancel at ffffffffa0a01819 [osc]
            #13 [ffff88105cd07850] cl_lock_cancel0 at ffffffffa05fa1c5 [obdclass]
            #14 [ffff88105cd07880] cl_lock_cancel at ffffffffa05fafeb [obdclass]
            #15 [ffff88105cd078a0] osc_ldlm_blocking_ast at ffffffffa0a02f7a [osc]
            #16 [ffff88105cd07910] ldlm_cancel_callback at ffffffffa07031f0 [ptlrpc]
            #17 [ffff88105cd07930] ldlm_cli_cancel_local at ffffffffa071d0cb [ptlrpc]
            #18 [ffff88105cd07960] ldlm_cli_cancel at ffffffffa0720f78 [ptlrpc]
            #19 [ffff88105cd079a0] cleanup_resource at ffffffffa070ae18 [ptlrpc]
            #20 [ffff88105cd07a00] ldlm_resource_clean at ffffffffa070afda [ptlrpc]
            #21 [ffff88105cd07a20] cfs_hash_for_each_relax at ffffffffa04ef28f [libcfs]
            #22 [ffff88105cd07ab0] cfs_hash_for_each_nolock at ffffffffa04f0caf [libcfs]
            #23 [ffff88105cd07ae0] ldlm_namespace_cleanup at ffffffffa0707a69 [ptlrpc]
            #24 [ffff88105cd07b00] osc_import_event at ffffffffa09f8f02 [osc]
            #25 [ffff88105cd07b60] ptlrpc_invalidate_import at ffffffffa076c0f0 [ptlrpc]
            #26 [ffff88105cd07c20] ptlrpc_set_import_active at ffffffffa074175d [ptlrpc]
            #27 [ffff88105cd07c50] osc_iocontrol at ffffffffa09f45e0 [osc]
            #28 [ffff88105cd07d00] lov_iocontrol at ffffffffa0a5a82f [lov]
            #29 [ffff88105cd07ec0] ll_umount_begin at ffffffffa0b29520 [lustre]
            #30 [ffff88105cd07ef0] sys_umount at ffffffff8118322a
            #31 [ffff88105cd07f80] system_call_fastpath at ffffffff810030f2
                RIP: 00007f52e77837a7  RSP: 00007fffc0f25188  RFLAGS: 00010206
                RAX: 00000000000000a6  RBX: ffffffff810030f2  RCX: 0000000000000010
                RDX: 0000000000000000  RSI: 0000000000000001  RDI: 00007f52e8c75a00
                RBP: 00007f52e8c759e0   R8: 00007f52e8c75a20   R9: 0000000000000001
                R10: 00007fffc0f24fb0  R11: 0000000000000206  R12: 0000000000000000
                R13: 0000000000000000  R14: 0000000000000000  R15: 00007f52e8c75a60
                ORIG_RAX: 00000000000000a6  CS: 0033  SS: 002b
            

            These last stacks seems to indicate that the Lustre pages flushing mechanism was slow (hung ?) but again the Lustre code used there is far to be impacted by both patches.

            On the other hand, do you have some news about exposure of Change #6089 only vs the production work-load ??

            bfaccini Bruno Faccini (Inactive) added a comment - Hello Seb, I spent some time to debug this last crash and understand what kind of deadlock you encountered. As you stated there was hung processes back-traces in the log and they were all with the same stack like following : INFO: task get_bench.bull:18887 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. get_bench.bul D 0000000000000016 0 18887 18828 0x00000080 ffff88084eaafcc8 0000000000000082 0000000000000001 5a5a5a5a5a5a5a5a 5a5a5a5a5a5a5a5a ffff88084eaafd48 ffff88084eaafc48 ffffffffa04df92e ffff88085ceb43e0 ffff88084eaaffd8 000000000000db00 ffff88085ceb43e0 Call Trace: [<ffffffffa04df92e>] ? cfs_free+0xe/0x10 [libcfs] [<ffffffff8117b997>] ? dput+0xc7/0x190 [<ffffffff81487205>] rwsem_down_failed_common+0x95/0x1f0 [<ffffffff81487383>] rwsem_down_write_failed+0x23/0x30 [<ffffffff81262023>] call_rwsem_down_write_failed+0x13/0x20 [<ffffffff81486832>] ? down_write+0x32/0x40 [<ffffffffa0b2ee3b>] ll_setattr_raw+0x18b/0xf20 [lustre] [<ffffffffa0b2fc2f>] ll_setattr+0x5f/0x100 [lustre] [<ffffffff8117f4e8>] notify_change+0x168/0x340 [<ffffffff81161343>] sys_fchmodat+0xc3/0x100 [<ffffffff81161398>] sys_chmod+0x18/0x20 [<ffffffff810030f2>] system_call_fastpath+0x16/0x1b and they were no-longer present at the time of the crash-dump. Which seems to indicate that, since they should be Un-interruptible at this point, the inode/ll_inode_info semaphore they were hung on became available, thus it was not really a dead-lock and also the Lustre code used there is far to be impacted by both patches. BTW, in the crash-dump I also found that 11 "cat" processes are hung since about 118s, all with the same stack-trace like following : PID: 17234 TASK: ffff880867c87080 CPU: 2 COMMAND: "cat" #0 [ffff88086473f138] schedule at ffffffff81484c05 #1 [ffff88086473f200] io_schedule at ffffffff814853e3 #2 [ffff88086473f220] sync_page at ffffffff810fd4ad #3 [ffff88086473f230] __wait_on_bit_lock at ffffffff81485c2a #4 [ffff88086473f280] __lock_page at ffffffff810fd447 #5 [ffff88086473f2e0] vvp_page_own at ffffffffa0b6220a [lustre] #6 [ffff88086473f300] cl_page_own0 at ffffffffa05f80bb [obdclass] #7 [ffff88086473f350] cl_page_own at ffffffffa05f8330 [obdclass] #8 [ffff88086473f360] check_and_discard_cb at ffffffffa05fe9ff [obdclass] #9 [ffff88086473f3b0] cl_page_gang_lookup at ffffffffa05f9994 [obdclass] #10 [ffff88086473f460] cl_lock_page_out at ffffffffa05fb53b [obdclass] #11 [ffff88086473f4d0] osc_lock_flush at ffffffffa0a0177f [osc] #12 [ffff88086473f520] osc_lock_cancel at ffffffffa0a01819 [osc] #13 [ffff88086473f570] cl_lock_cancel0 at ffffffffa05fa1c5 [obdclass] #14 [ffff88086473f5a0] cl_lock_cancel at ffffffffa05fafeb [obdclass] #15 [ffff88086473f5c0] osc_ldlm_blocking_ast at ffffffffa0a02f7a [osc] #16 [ffff88086473f630] ldlm_cancel_callback at ffffffffa07031f0 [ptlrpc] #17 [ffff88086473f650] ldlm_cli_cancel_local at ffffffffa071d0cb [ptlrpc] #18 [ffff88086473f680] ldlm_cli_cancel_list_local at ffffffffa071f9bd [ptlrpc] #19 [ffff88086473f6e0] ldlm_cancel_resource_local at ffffffffa071fc62 [ptlrpc] #20 [ffff88086473f750] osc_destroy at ffffffffa09ec44f [osc] #21 [ffff88086473f800] lov_destroy at ffffffffa0a52cad [lov] #22 [ffff88086473f8f0] ll_objects_destroy at ffffffffa0b447b7 [lustre] #23 [ffff88086473fa20] ll_close_inode_openhandle at ffffffffa0b142a1 [lustre] #24 [ffff88086473faa0] ll_md_real_close at ffffffffa0b1514a [lustre] #25 [ffff88086473fad0] ll_md_close at ffffffffa0b15425 [lustre] #26 [ffff88086473fb80] ll_file_release at ffffffffa0b15a35 [lustre] #27 [ffff88086473fbd0] __fput at ffffffff81164a85 #28 [ffff88086473fc20] fput at ffffffff81164bc5 #29 [ffff88086473fc30] filp_close at ffffffff8116061d #30 [ffff88086473fc60] put_files_struct at ffffffff8105732f #31 [ffff88086473fcb0] exit_files at ffffffff810573f3 #32 [ffff88086473fce0] do_exit at ffffffff81059475 #33 [ffff88086473fd60] do_group_exit at ffffffff81059bc8 #34 [ffff88086473fd90] get_signal_to_deliver at ffffffff8106eec6 #35 [ffff88086473fe30] do_signal at ffffffff810022d5 #36 [ffff88086473ff30] do_notify_resume at ffffffff81002af0 #37 [ffff88086473ff50] int_signal at ffffffff810033c1 RIP: 00000032bcad83f0 RSP: 00007fff228120a8 RFLAGS: 00000246 RAX: fffffffffffffffc RBX: 0000000000200000 RCX: ffffffffffffffff RDX: 0000000000200000 RSI: 00007fbf6c365000 RDI: 0000000000000003 RBP: 00007fbf6c365000 R8: 00000000ffffffff R9: 0000000000000000 R10: 0000000000200fff R11: 0000000000000246 R12: 0000000000000003 R13: ffffffffffff8000 R14: 0000000000200000 R15: 00007fbf6c365000 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b and also, since about 112s, the Lustre umount process that was launched as part of the shutdown : PID: 24287 TASK: ffff8810414317d0 CPU: 17 COMMAND: "umount" #0 [ffff88105cd07418] schedule at ffffffff81484c05 #1 [ffff88105cd074e0] io_schedule at ffffffff814853e3 #2 [ffff88105cd07500] sync_page at ffffffff810fd4ad #3 [ffff88105cd07510] __wait_on_bit_lock at ffffffff81485c2a #4 [ffff88105cd07560] __lock_page at ffffffff810fd447 #5 [ffff88105cd075c0] vvp_page_own at ffffffffa0b6220a [lustre] #6 [ffff88105cd075e0] cl_page_own0 at ffffffffa05f80bb [obdclass] #7 [ffff88105cd07630] cl_page_own at ffffffffa05f8330 [obdclass] #8 [ffff88105cd07640] check_and_discard_cb at ffffffffa05fe9ff [obdclass] #9 [ffff88105cd07690] cl_page_gang_lookup at ffffffffa05f9994 [obdclass] #10 [ffff88105cd07740] cl_lock_page_out at ffffffffa05fb53b [obdclass] #11 [ffff88105cd077b0] osc_lock_flush at ffffffffa0a0177f [osc] #12 [ffff88105cd07800] osc_lock_cancel at ffffffffa0a01819 [osc] #13 [ffff88105cd07850] cl_lock_cancel0 at ffffffffa05fa1c5 [obdclass] #14 [ffff88105cd07880] cl_lock_cancel at ffffffffa05fafeb [obdclass] #15 [ffff88105cd078a0] osc_ldlm_blocking_ast at ffffffffa0a02f7a [osc] #16 [ffff88105cd07910] ldlm_cancel_callback at ffffffffa07031f0 [ptlrpc] #17 [ffff88105cd07930] ldlm_cli_cancel_local at ffffffffa071d0cb [ptlrpc] #18 [ffff88105cd07960] ldlm_cli_cancel at ffffffffa0720f78 [ptlrpc] #19 [ffff88105cd079a0] cleanup_resource at ffffffffa070ae18 [ptlrpc] #20 [ffff88105cd07a00] ldlm_resource_clean at ffffffffa070afda [ptlrpc] #21 [ffff88105cd07a20] cfs_hash_for_each_relax at ffffffffa04ef28f [libcfs] #22 [ffff88105cd07ab0] cfs_hash_for_each_nolock at ffffffffa04f0caf [libcfs] #23 [ffff88105cd07ae0] ldlm_namespace_cleanup at ffffffffa0707a69 [ptlrpc] #24 [ffff88105cd07b00] osc_import_event at ffffffffa09f8f02 [osc] #25 [ffff88105cd07b60] ptlrpc_invalidate_import at ffffffffa076c0f0 [ptlrpc] #26 [ffff88105cd07c20] ptlrpc_set_import_active at ffffffffa074175d [ptlrpc] #27 [ffff88105cd07c50] osc_iocontrol at ffffffffa09f45e0 [osc] #28 [ffff88105cd07d00] lov_iocontrol at ffffffffa0a5a82f [lov] #29 [ffff88105cd07ec0] ll_umount_begin at ffffffffa0b29520 [lustre] #30 [ffff88105cd07ef0] sys_umount at ffffffff8118322a #31 [ffff88105cd07f80] system_call_fastpath at ffffffff810030f2 RIP: 00007f52e77837a7 RSP: 00007fffc0f25188 RFLAGS: 00010206 RAX: 00000000000000a6 RBX: ffffffff810030f2 RCX: 0000000000000010 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 00007f52e8c75a00 RBP: 00007f52e8c759e0 R8: 00007f52e8c75a20 R9: 0000000000000001 R10: 00007fffc0f24fb0 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 00007f52e8c75a60 ORIG_RAX: 00000000000000a6 CS: 0033 SS: 002b These last stacks seems to indicate that the Lustre pages flushing mechanism was slow (hung ?) but again the Lustre code used there is far to be impacted by both patches. On the other hand, do you have some news about exposure of Change #6089 only vs the production work-load ??

            I am currently uploading the crash taken after the deadlock on the version with both patches applied.
            This is on the ftp server in uploads/LU-2797/deadlock-login0-2013-05-13-patches-6089+6200.tar.xz
            You will see in the dmesg the hang processes backtrace, but as the dump was taken after a failed reboot attempt, the processes have been already killed and disappeared.

            spiechurski Sebastien Piechurski added a comment - I am currently uploading the crash taken after the deadlock on the version with both patches applied. This is on the ftp server in uploads/ LU-2797 /deadlock-login0-2013-05-13-patches-6089+6200.tar.xz You will see in the dmesg the hang processes backtrace, but as the dump was taken after a failed reboot attempt, the processes have been already killed and disappeared.
            bfaccini Bruno Faccini (Inactive) added a comment - - edited

            Hello Seb,

            And thank you for the update.

            Yes you are right, an may be I did not mention it enough explicitly, but #6089 and #6200 are 2 separate fixes and are not intended to be used together.

            About the other possible window for the race, it seem to exist but in any case appears less important/ than the 1rst that you trigger and to be fixed in #6089. So can you give a try to #6089 solo/only ??

            BTW, how did you integrate both patches since #6200 avoids any use/declare of lov_oinfo/loi, when #6089 uses it extensively ??!!

            And again, many thank's for your help on this.

            bfaccini Bruno Faccini (Inactive) added a comment - - edited Hello Seb, And thank you for the update. Yes you are right, an may be I did not mention it enough explicitly, but #6089 and #6200 are 2 separate fixes and are not intended to be used together. About the other possible window for the race, it seem to exist but in any case appears less important/ than the 1rst that you trigger and to be fixed in #6089. So can you give a try to #6089 solo/only ?? BTW, how did you integrate both patches since #6200 avoids any use/declare of lov_oinfo/loi, when #6089 uses it extensively ??!! And again, many thank's for your help on this.

            Hi Bruno,

            I think the package we tried was including both #6089 and #6200.
            Looking into more details, adding to the fact that #6200 was rejected, I understand that these were not supposed to be applied together anyway, am I right ?

            There is a comment for #6089 saying that there is another window for the same kind of race. Will there be an update to this patch to close this window ?

            spiechurski Sebastien Piechurski added a comment - Hi Bruno, I think the package we tried was including both #6089 and #6200. Looking into more details, adding to the fact that #6200 was rejected, I understand that these were not supposed to be applied together anyway, am I right ? There is a comment for #6089 saying that there is another window for the same kind of race. Will there be an update to this patch to close this window ?

            Hello Alex,

            Sorry about that, but do you have more infos to provide that may help me to understand what is going wrong ?? Is there any crash-dump available or any other infos for this dead-lock ?? Did you or on-site people make some troubleshooting already ??

            Also, which path/change did you integrate, #6089 or #6200 ??

            bfaccini Bruno Faccini (Inactive) added a comment - Hello Alex, Sorry about that, but do you have more infos to provide that may help me to understand what is going wrong ?? Is there any crash-dump available or any other infos for this dead-lock ?? Did you or on-site people make some troubleshooting already ?? Also, which path/change did you integrate, #6089 or #6200 ??

            quick feedback from site : with the proposed patch the system deadlock almost immediately.

            louveta Alexandre Louvet (Inactive) added a comment - quick feedback from site : with the proposed patch the system deadlock almost immediately.

            We might be able to reproduce the crashing workload on another machine which is currently under installation.
            I am currently in vacation, but will try that next week when I'm back.

            spiechurski Sebastien Piechurski added a comment - We might be able to reproduce the crashing workload on another machine which is currently under installation. I am currently in vacation, but will try that next week when I'm back.

            Hi Bruno,

            The bug only occurs on the login node, and this node is unique. So people on site would like to wait for inspections and approvals of the 2 patches before testing them.

            Thanks,
            Sebastien.

            sebastien.buisson Sebastien Buisson (Inactive) added a comment - Hi Bruno, The bug only occurs on the login node, and this node is unique. So people on site would like to wait for inspections and approvals of the 2 patches before testing them. Thanks, Sebastien.

            Seb(s), Benjamin,

            Could it be possible that you install+run builds with these 2 patches (on a few Nodes/Clients where the problem already occured) and expose them to the site work-load likely to reproduce the crash ??

            BTW, is Meteo site running under production already ??

            bfaccini Bruno Faccini (Inactive) added a comment - Seb(s), Benjamin, Could it be possible that you install+run builds with these 2 patches (on a few Nodes/Clients where the problem already occured) and expose them to the site work-load likely to reproduce the crash ?? BTW, is Meteo site running under production already ??

            People

              bfaccini Bruno Faccini (Inactive)
              spiechurski Sebastien Piechurski
              Votes:
              0 Vote for this issue
              Watchers:
              11 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: