Details

    • Bug
    • Resolution: Fixed
    • Major
    • Lustre 2.13.0, Lustre 2.12.4
    • Lustre 2.13.0, Lustre 2.12.3
    • None
    • 3
    • 9223372036854775807

    Description

      Hit with master.

      Steps to reproduce:

      sles15build01:/home/hornc/lustre-filesystem # insmod ./libcfs/libcfs/libcfs.ko
      sles15build01:/home/hornc/lustre-filesystem # insmod ./lnet/lnet/lnet.ko
      sles15build01:/home/hornc/lustre-filesystem # cd lnet/utils/
      sles15build01:/home/hornc/lustre-filesystem/lnet/utils # ./lnetctl lnet configure
      sles15build01:/home/hornc/lustre-filesystem/lnet/utils # ./lnetctl export
      net:
          - net type: lo
            local NI(s):
              - nid: 0@lo
                status: up
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                received_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                dropped_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                health stats:
                    health value: 0
                    interrupts: 0
                    dropped: 0
                    aborted: 0
                    no route: 0
                    timeouts: 0
                    error: 0
                tunables:
                    peer_timeout: 0
                    peer_credits: 0
                    peer_buffer_credits: 0
                    credits: 0
                dev cpt: 0
                tcp bonding: 0
                CPT: "[0,1,2,3]"
      global:
          numa_range: 0
          max_intf: 200
          discovery: 1
          drop_asym_route: 0
      sles15build01:/home/hornc/lustre-filesystem/lnet/utils # ./lnetctl route add --net gni4 --gateway 10.12.0.[1-4]@o2ib40
      sles15build01:/home/hornc/lustre-filesystem/lnet/utils # ./lnetctl export
      net:
          - net type: lo
            local NI(s):
              - nid: 0@lo
                status: up
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                received_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                dropped_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                health stats:
                    health value: 0
                    interrupts: 0
                    dropped: 0
                    aborted: 0
                    no route: 0
                    timeouts: 0
                    error: 0
                tunables:
                    peer_timeout: 0
                    peer_credits: 0
                    peer_buffer_credits: 0
                    credits: 0
                dev cpt: 0
                tcp bonding: 0
                CPT: "[0,1,2,3]"
      route:
          - net: gni4
            gateway: 10.12.0.4@o2ib40
            hop: -1
            priority: 0
            health_sensitivity: 1
            state: down
          - net: gni4
            gateway: 10.12.0.3@o2ib40
            hop: -1
            priority: 0
            health_sensitivity: 1
            state: down
          - net: gni4
            gateway: 10.12.0.2@o2ib40
            hop: -1
            priority: 0
            health_sensitivity: 1
            state: down
          - net: gni4
            gateway: 10.12.0.1@o2ib40
            hop: -1
            priority: 0
            health_sensitivity: 1
            state: down
      peer:
          - primary nid: 10.12.0.1@o2ib40
            Multi-Rail: True
            peer ni:
              - nid: 10.12.0.1@o2ib40
                state: up
                max_ni_tx_credits: 0
                available_tx_credits: 0
                min_tx_credits: 0
                tx_q_num_of_buf: 0
                available_rtr_credits: 0
                min_rtr_credits: 0
                refcount: 2
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                received_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                dropped_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                health stats:
                    health value: 1000
                    dropped: 0
                    timeout: 0
                    error: 0
                    network timeout: 0
          - primary nid: 10.12.0.2@o2ib40
            Multi-Rail: True
            peer ni:
              - nid: 10.12.0.2@o2ib40
                state: up
                max_ni_tx_credits: 0
                available_tx_credits: 0
                min_tx_credits: 0
                tx_q_num_of_buf: 0
                available_rtr_credits: 0
                min_rtr_credits: 0
                refcount: 2
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                received_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                dropped_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                health stats:
                    health value: 1000
                    dropped: 0
                    timeout: 0
                    error: 0
                    network timeout: 0
          - primary nid: 10.12.0.4@o2ib40
            Multi-Rail: True
            peer ni:
              - nid: 10.12.0.4@o2ib40
                state: up
                max_ni_tx_credits: 0
                available_tx_credits: 0
                min_tx_credits: 0
                tx_q_num_of_buf: 0
                available_rtr_credits: 0
                min_rtr_credits: 0
                refcount: 2
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                received_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                dropped_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                health stats:
                    health value: 1000
                    dropped: 0
                    timeout: 0
                    error: 0
                    network timeout: 0
          - primary nid: 10.12.0.3@o2ib40
            Multi-Rail: True
            peer ni:
              - nid: 10.12.0.3@o2ib40
                state: up
                max_ni_tx_credits: 0
                available_tx_credits: 0
                min_tx_credits: 0
                tx_q_num_of_buf: 0
                available_rtr_credits: 0
                min_rtr_credits: 0
                refcount: 2
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                received_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                dropped_stats:
                    put: 0
                    get: 0
                    reply: 0
                    ack: 0
                    hello: 0
                health stats:
                    health value: 1000
                    dropped: 0
                    timeout: 0
                    error: 0
                    network timeout: 0
      global:
          numa_range: 0
          max_intf: 200
          discovery: 1
          drop_asym_route: 0
      sles15build01:/home/hornc/lustre-filesystem/lnet/utils # ./lnetctl route del --gateway 10.12.0.[1-4]@o2ib4 --net gni4
      ^^^^ Command hangs
      

      Attachments

        1. dmesg.txt
          253 kB
          Chris Horn
        2. LU-12411.dump.tar.bz2
          103.24 MB
          Chris Horn

        Issue Links

          Activity

            [LU-12411] Hang on lnetctl route del

            Oleg Drokin (green@whamcloud.com) merged in patch https://review.whamcloud.com/36870/
            Subject: LU-12411 lnet: Do not allow gateways on remote nets
            Project: fs/lustre-release
            Branch: b2_12
            Current Patch Set:
            Commit: c6c9084c959ac972af557da100f251eccc79d2f7

            gerrit Gerrit Updater added a comment - Oleg Drokin (green@whamcloud.com) merged in patch https://review.whamcloud.com/36870/ Subject: LU-12411 lnet: Do not allow gateways on remote nets Project: fs/lustre-release Branch: b2_12 Current Patch Set: Commit: c6c9084c959ac972af557da100f251eccc79d2f7

            Minh Diep (mdiep@whamcloud.com) uploaded a new patch: https://review.whamcloud.com/36870
            Subject: LU-12411 lnet: Do not allow gateways on remote nets
            Project: fs/lustre-release
            Branch: b2_12
            Current Patch Set: 1
            Commit: cb829cc7683e66ecb95db7a80e925d716b6560e9

            gerrit Gerrit Updater added a comment - Minh Diep (mdiep@whamcloud.com) uploaded a new patch: https://review.whamcloud.com/36870 Subject: LU-12411 lnet: Do not allow gateways on remote nets Project: fs/lustre-release Branch: b2_12 Current Patch Set: 1 Commit: cb829cc7683e66ecb95db7a80e925d716b6560e9
            pjones Peter Jones added a comment -

            ok thanks. I've flagged the second one for the LTS so we'll pick it up when that lands to master.

            pjones Peter Jones added a comment - ok thanks. I've flagged the second one for the LTS so we'll pick it up when that lands to master.
            hornc Chris Horn added a comment -

            pjones FYI, I discovered that this is a regression that was originally introduced in 2.10.0

            commit 376633ab5c487a2e9497e118ce351c4b1597bf33
            Author: Amir Shehata <amir.shehata@intel.com>
            Date:   Mon Jul 4 14:51:06 2016 -0700
             
                LU-7734 lnet: Routing fixes part 1
            

            Unfortunately, my fix that landed under this ticket contained a flaw. I pushed a patch for that under LU-12595. Assuming LU-12595 lands, you might consider landing both of these patches to LTS.

            hornc Chris Horn added a comment - pjones FYI, I discovered that this is a regression that was originally introduced in 2.10.0 commit 376633ab5c487a2e9497e118ce351c4b1597bf33 Author: Amir Shehata <amir.shehata@intel.com> Date: Mon Jul 4 14:51:06 2016 -0700 LU-7734 lnet: Routing fixes part 1 Unfortunately, my fix that landed under this ticket contained a flaw. I pushed a patch for that under LU-12595 . Assuming LU-12595 lands, you might consider landing both of these patches to LTS.
            hornc Chris Horn added a comment -

            Amir, Is there another bug here to chase, or is it sufficient to just prevent the behavior that leads to breakage?

            hornc Chris Horn added a comment - Amir, Is there another bug here to chase, or is it sufficient to just prevent the behavior that leads to breakage?
            pjones Peter Jones added a comment -

            Landed for 2.13

            pjones Peter Jones added a comment - Landed for 2.13

            Oleg Drokin (green@whamcloud.com) merged in patch https://review.whamcloud.com/35198/
            Subject: LU-12411 lnet: Do not allow gateways on remote nets
            Project: fs/lustre-release
            Branch: master
            Current Patch Set:
            Commit: 43b35351e9ca258773e89c2d68047e939fb822fb

            gerrit Gerrit Updater added a comment - Oleg Drokin (green@whamcloud.com) merged in patch https://review.whamcloud.com/35198/ Subject: LU-12411 lnet: Do not allow gateways on remote nets Project: fs/lustre-release Branch: master Current Patch Set: Commit: 43b35351e9ca258773e89c2d68047e939fb822fb

            Steps to reproduce

            [root@lustre01 ~]# modprobe lnet
            [root@lustre01 ~]# lnetctl lnet configure
            [root@lustre01 ~]# lnetctl route add --net tcp1 --gateway 192.168.122.[106-107]@tcp
            [root@lustre01 ~]# lnetctl route del --net tcp1 --gateway 192.168.122.[106-107]@tcp
            
            ashehata Amir Shehata (Inactive) added a comment - Steps to reproduce [root@lustre01 ~]# modprobe lnet [root@lustre01 ~]# lnetctl lnet configure [root@lustre01 ~]# lnetctl route add --net tcp1 --gateway 192.168.122.[106-107]@tcp [root@lustre01 ~]# lnetctl route del --net tcp1 --gateway 192.168.122.[106-107]@tcp

            Chris Horn (hornc@cray.com) uploaded a new patch: https://review.whamcloud.com/35198
            Subject: LU-12411 lnet: Do not allow gateways on remote nets
            Project: fs/lustre-release
            Branch: master
            Current Patch Set: 1
            Commit: 203acca356f3793c1f1c469af1b83790c814c9ad

            gerrit Gerrit Updater added a comment - Chris Horn (hornc@cray.com) uploaded a new patch: https://review.whamcloud.com/35198 Subject: LU-12411 lnet: Do not allow gateways on remote nets Project: fs/lustre-release Branch: master Current Patch Set: 1 Commit: 203acca356f3793c1f1c469af1b83790c814c9ad
            hornc Chris Horn added a comment -

            This might also be a bug exposed when adding any route where the gateway is on an unreachable lnet. We probably shouldn't allow that to happen. I will push a patch to prevent this.

            hornc Chris Horn added a comment - This might also be a bug exposed when adding any route where the gateway is on an unreachable lnet. We probably shouldn't allow that to happen. I will push a patch to prevent this.

            People

              ashehata Amir Shehata (Inactive)
              hornc Chris Horn
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: