Uploaded image for project: 'Lustre'
  1. Lustre
  2. LU-11529

lnetctl does not change NI health value from YAML configuration file

Details

    • 3
    • 9223372036854775807

    Description

      [root@trevis-402 ~]# lnetctl net show
      net:
          - net type: lo   
            local NI(s):   
              - nid: 0@lo  
                status: up 
      
      [root@trevis-402 ~]# cat lnet.yaml
      net:
          - net type: o2ib 
            local NI(s):   
              - nid: 192.168.1.2@o2ib
                status: up 
                interfaces:
                    0: ib0 
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                received_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                dropped_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                health stats:
                    health value: 512      <------------ health value was set to 512 in the YAML file
                    interrupts: 0
                    dropped: 0
                    aborted: 0
                    no route: 0
                    timeouts: 0
                    error: 0
                tunables:  
                    peer_timeout: 180
                    peer_credits: 8
                    peer_buffer_credits: 0
                    credits: 256
                    peercredits_hiw: 4
                    map_on_demand: 1
                    concurrent_sends: 0
                    fmr_pool_size: 512
                    fmr_flush_trigger: 384
                    fmr_cache: 1
                    ntx: 512
                    conns_per_peer: 1
                lnd tunables:
                dev cpt: 0 
                tcp bonding: 0
                CPT: "[0,1]"
      global:
          numa_range: 0
          max_intf: 200
          discovery: 1
      
      [root@trevis-402 ~]# lnetctl import --add lnet.yaml
      [root@trevis-402 ~]# lnetctl net show
      net:
          - net type: lo   
            local NI(s):   
              - nid: 0@lo  
                status: up 
          - net type: o2ib 
            local NI(s):   
              - nid: 192.168.1.2@o2ib
                status: up 
                interfaces:
                    0: ib0 
      
      [root@trevis-402 ~]# lnetctl net show -v 3
      net:
          - net type: lo   
            local NI(s):   
              - nid: 0@lo  
                status: up 
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                received_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                dropped_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                health stats:
                    health value: 0
                    interrupts: 0
                    dropped: 0
                    aborted: 0
                    no route: 0
                    timeouts: 0
                    error: 0
                tunables:  
                    peer_timeout: 0
                    peer_credits: 0
                    peer_buffer_credits: 0
                    credits: 0
                dev cpt: 0 
                tcp bonding: 0
                CPT: "[0,1]"
          - net type: o2ib 
            local NI(s):   
              - nid: 192.168.1.2@o2ib
                status: up 
                interfaces:
                    0: ib0 
                statistics:
                    send_count: 0
                    recv_count: 0
                    drop_count: 0
                sent_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                received_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                dropped_stats:
                    put: 0 
                    get: 0 
                    reply: 0
                    ack: 0 
                    hello: 0
                health stats:
                    health value: 1000      <------------ health value became default instead of 512 in the YAML file
                    interrupts: 0
                    dropped: 0
                    aborted: 0
                    no route: 0
                    timeouts: 0
                    error: 0
                tunables:  
                    peer_timeout: 180
                    peer_credits: 8
                    peer_buffer_credits: 0
                    credits: 256
                    peercredits_hiw: 4
                    map_on_demand: 1
                    concurrent_sends: 0
                    fmr_pool_size: 512
                    fmr_flush_trigger: 384
                    fmr_cache: 1
                    ntx: 512
                    conns_per_peer: 1
                lnd tunables:
                dev cpt: 0 
                tcp bonding: 0
                CPT: "[0,1]"
      

      Attachments

        Issue Links

          Activity

            [LU-11529] lnetctl does not change NI health value from YAML configuration file

            This is a debug command. The purpose of it is to test that when the health value is set to something other than the maximum, then it gets put on the recovery queue. That's why you see the value not exactly what you set it. When it gets put on the recovery queue the interface gets pinged and with every successful ping the health value is increased. If you wait for a bit, the health value should go back up to the maximum value.

            I didn't add a way to change that from YAML. I know the test case says that, but now that I think about it, since this is a debugging command, I don't think setting it from YAML is a necessity.

            I'm now thinking that it might be a good idea to move this under the debug subcommand instead of keeping it in net. So might as well keep this ticket open for that purpose.

            ashehata Amir Shehata (Inactive) added a comment - This is a debug command. The purpose of it is to test that when the health value is set to something other than the maximum, then it gets put on the recovery queue. That's why you see the value not exactly what you set it. When it gets put on the recovery queue the interface gets pinged and with every successful ping the health value is increased. If you wait for a bit, the health value should go back up to the maximum value. I didn't add a way to change that from YAML. I know the test case says that, but now that I think about it, since this is a debugging command, I don't think setting it from YAML is a necessity. I'm now thinking that it might be a good idea to move this under the debug subcommand instead of keeping it in net. So might as well keep this ticket open for that purpose.
            yujian Jian Yu added a comment -

            lnetctl command also does not set the value correctly:

            # lnetctl net set --nid 192.168.1.2@o2ib --health 256
            
            # lnetctl net show -v 3
            net:
                - net type: lo
                  local NI(s):
                    - nid: 0@lo
                      status: up
                      statistics:
                          send_count: 0
                          recv_count: 0
                          drop_count: 0
                      sent_stats:
                          put: 0
                          get: 0
                          reply: 0
                          ack: 0
                          hello: 0
                      received_stats:
                          put: 0
                          get: 0
                          reply: 0
                          ack: 0
                          hello: 0
                      dropped_stats:
                          put: 0
                          get: 0
                          reply: 0
                          ack: 0
                          hello: 0
                      health stats:
                          health value: 0
                          interrupts: 0
                          dropped: 0
                          aborted: 0
                          no route: 0
                          timeouts: 0
                          error: 0
                      tunables:
                          peer_timeout: 0
                          peer_credits: 0
                          peer_buffer_credits: 0
                          credits: 0
                      dev cpt: 0
                      tcp bonding: 0
                      CPT: "[0,1]"
                - net type: o2ib
                  local NI(s):
                    - nid: 192.168.1.2@o2ib
                      status: up
                      interfaces:
                          0: ib0
                      statistics:
                          send_count: 70
                          recv_count: 70
                          drop_count: 0
                      sent_stats:
                          put: 0
                          get: 70
                          reply: 0
                          ack: 0
                          hello: 0
                      received_stats:
                          put: 0
                          get: 35
                          reply: 35
                          ack: 0
                          hello: 0
                      dropped_stats:
                          put: 0
                          get: 0
                          reply: 0
                          ack: 0
                          hello: 0
                      health stats:
                          health value: 361      <------------ health value became 361 instead of 256
                          interrupts: 0
                          dropped: 0
                          aborted: 0
                          no route: 0
                          timeouts: 0
                          error: 0
                      tunables:
                          peer_timeout: 180
                          peer_credits: 8
                          peer_buffer_credits: 0
                          credits: 256
                          peercredits_hiw: 4
                          map_on_demand: 1
                          concurrent_sends: 0
                          fmr_pool_size: 512
                          fmr_flush_trigger: 384
                          fmr_cache: 1
                          ntx: 512
                          conns_per_peer: 1
                      lnd tunables:
                      dev cpt: 0
                      tcp bonding: 0
                      CPT: "[0,1]"
            
            yujian Jian Yu added a comment - lnetctl command also does not set the value correctly: # lnetctl net set --nid 192.168.1.2@o2ib --health 256 # lnetctl net show -v 3 net: - net type: lo local NI(s): - nid: 0@lo status: up statistics: send_count: 0 recv_count: 0 drop_count: 0 sent_stats: put: 0 get: 0 reply: 0 ack: 0 hello: 0 received_stats: put: 0 get: 0 reply: 0 ack: 0 hello: 0 dropped_stats: put: 0 get: 0 reply: 0 ack: 0 hello: 0 health stats: health value: 0 interrupts: 0 dropped: 0 aborted: 0 no route: 0 timeouts: 0 error: 0 tunables: peer_timeout: 0 peer_credits: 0 peer_buffer_credits: 0 credits: 0 dev cpt: 0 tcp bonding: 0 CPT: "[0,1]" - net type: o2ib local NI(s): - nid: 192.168.1.2@o2ib status: up interfaces: 0: ib0 statistics: send_count: 70 recv_count: 70 drop_count: 0 sent_stats: put: 0 get: 70 reply: 0 ack: 0 hello: 0 received_stats: put: 0 get: 35 reply: 35 ack: 0 hello: 0 dropped_stats: put: 0 get: 0 reply: 0 ack: 0 hello: 0 health stats: health value: 361 <------------ health value became 361 instead of 256 interrupts: 0 dropped: 0 aborted: 0 no route: 0 timeouts: 0 error: 0 tunables: peer_timeout: 180 peer_credits: 8 peer_buffer_credits: 0 credits: 256 peercredits_hiw: 4 map_on_demand: 1 concurrent_sends: 0 fmr_pool_size: 512 fmr_flush_trigger: 384 fmr_cache: 1 ntx: 512 conns_per_peer: 1 lnd tunables: dev cpt: 0 tcp bonding: 0 CPT: "[0,1]"

            People

              ashehata Amir Shehata (Inactive)
              yujian Jian Yu
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated: