[LU-11529] lnetctl does not change NI health value from YAML configuration file Created: 17/Oct/18 Updated: 21/Jan/22 |
|
| Status: | Open |
| Project: | Lustre |
| Component/s: | None |
| Affects Version/s: | Lustre 2.12.0 |
| Fix Version/s: | None |
| Type: | Improvement | Priority: | Minor |
| Reporter: | Jian Yu | Assignee: | Amir Shehata (Inactive) |
| Resolution: | Unresolved | Votes: | 0 |
| Labels: | lnet-health | ||
| Environment: |
Lustre Build: https://build.whamcloud.com/job/lustre-master/3806/ |
||
| Severity: | 3 |
| Rank (Obsolete): | 9223372036854775807 |
| Description |
[root@trevis-402 ~]# lnetctl net show
net:
- net type: lo
local NI(s):
- nid: 0@lo
status: up
[root@trevis-402 ~]# cat lnet.yaml
net:
- net type: o2ib
local NI(s):
- nid: 192.168.1.2@o2ib
status: up
interfaces:
0: ib0
statistics:
send_count: 0
recv_count: 0
drop_count: 0
sent_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
received_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
dropped_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
health stats:
health value: 512 <------------ health value was set to 512 in the YAML file
interrupts: 0
dropped: 0
aborted: 0
no route: 0
timeouts: 0
error: 0
tunables:
peer_timeout: 180
peer_credits: 8
peer_buffer_credits: 0
credits: 256
peercredits_hiw: 4
map_on_demand: 1
concurrent_sends: 0
fmr_pool_size: 512
fmr_flush_trigger: 384
fmr_cache: 1
ntx: 512
conns_per_peer: 1
lnd tunables:
dev cpt: 0
tcp bonding: 0
CPT: "[0,1]"
global:
numa_range: 0
max_intf: 200
discovery: 1
[root@trevis-402 ~]# lnetctl import --add lnet.yaml
[root@trevis-402 ~]# lnetctl net show
net:
- net type: lo
local NI(s):
- nid: 0@lo
status: up
- net type: o2ib
local NI(s):
- nid: 192.168.1.2@o2ib
status: up
interfaces:
0: ib0
[root@trevis-402 ~]# lnetctl net show -v 3
net:
- net type: lo
local NI(s):
- nid: 0@lo
status: up
statistics:
send_count: 0
recv_count: 0
drop_count: 0
sent_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
received_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
dropped_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
health stats:
health value: 0
interrupts: 0
dropped: 0
aborted: 0
no route: 0
timeouts: 0
error: 0
tunables:
peer_timeout: 0
peer_credits: 0
peer_buffer_credits: 0
credits: 0
dev cpt: 0
tcp bonding: 0
CPT: "[0,1]"
- net type: o2ib
local NI(s):
- nid: 192.168.1.2@o2ib
status: up
interfaces:
0: ib0
statistics:
send_count: 0
recv_count: 0
drop_count: 0
sent_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
received_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
dropped_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
health stats:
health value: 1000 <------------ health value became default instead of 512 in the YAML file
interrupts: 0
dropped: 0
aborted: 0
no route: 0
timeouts: 0
error: 0
tunables:
peer_timeout: 180
peer_credits: 8
peer_buffer_credits: 0
credits: 256
peercredits_hiw: 4
map_on_demand: 1
concurrent_sends: 0
fmr_pool_size: 512
fmr_flush_trigger: 384
fmr_cache: 1
ntx: 512
conns_per_peer: 1
lnd tunables:
dev cpt: 0
tcp bonding: 0
CPT: "[0,1]"
|
| Comments |
| Comment by Jian Yu [ 17/Oct/18 ] |
|
lnetctl command also does not set the value correctly: # lnetctl net set --nid 192.168.1.2@o2ib --health 256 # lnetctl net show -v 3
net:
- net type: lo
local NI(s):
- nid: 0@lo
status: up
statistics:
send_count: 0
recv_count: 0
drop_count: 0
sent_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
received_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
dropped_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
health stats:
health value: 0
interrupts: 0
dropped: 0
aborted: 0
no route: 0
timeouts: 0
error: 0
tunables:
peer_timeout: 0
peer_credits: 0
peer_buffer_credits: 0
credits: 0
dev cpt: 0
tcp bonding: 0
CPT: "[0,1]"
- net type: o2ib
local NI(s):
- nid: 192.168.1.2@o2ib
status: up
interfaces:
0: ib0
statistics:
send_count: 70
recv_count: 70
drop_count: 0
sent_stats:
put: 0
get: 70
reply: 0
ack: 0
hello: 0
received_stats:
put: 0
get: 35
reply: 35
ack: 0
hello: 0
dropped_stats:
put: 0
get: 0
reply: 0
ack: 0
hello: 0
health stats:
health value: 361 <------------ health value became 361 instead of 256
interrupts: 0
dropped: 0
aborted: 0
no route: 0
timeouts: 0
error: 0
tunables:
peer_timeout: 180
peer_credits: 8
peer_buffer_credits: 0
credits: 256
peercredits_hiw: 4
map_on_demand: 1
concurrent_sends: 0
fmr_pool_size: 512
fmr_flush_trigger: 384
fmr_cache: 1
ntx: 512
conns_per_peer: 1
lnd tunables:
dev cpt: 0
tcp bonding: 0
CPT: "[0,1]"
|
| Comment by Amir Shehata (Inactive) [ 17/Oct/18 ] |
|
This is a debug command. The purpose of it is to test that when the health value is set to something other than the maximum, then it gets put on the recovery queue. That's why you see the value not exactly what you set it. When it gets put on the recovery queue the interface gets pinged and with every successful ping the health value is increased. If you wait for a bit, the health value should go back up to the maximum value. I didn't add a way to change that from YAML. I know the test case says that, but now that I think about it, since this is a debugging command, I don't think setting it from YAML is a necessity. I'm now thinking that it might be a good idea to move this under the debug subcommand instead of keeping it in net. So might as well keep this ticket open for that purpose. |