Details
-
Bug
-
Resolution: Fixed
-
Minor
-
None
-
3
-
9223372036854775807
Description
Running "lctl set_param -P lod.*.mdt_hash=crush" or "lctl conf_param testfs.quota.mdt=ug3" hangs when running on an older system (el7.9 running 2.14.0 kernel modules):
Using TIMEOUT=20 Writer error: failed to resolve Netlink family id Writer error: failed to resolve Netlink family id osc.testfs-OST0000-osc-ffff95fc0529ae98.idle_timeout=debug osc.testfs-OST0001-osc-ffff95fc0529ae98.idle_timeout=debug osc.testfs-OST0002-osc-ffff95fc0529ae98.idle_timeout=debug osc.testfs-OST0003-osc-ffff95fc0529ae98.idle_timeout=debug disable quota as required Writer error: failed to resolve Netlink family id
Running with "ltrace lctl set_param -P lod.*.mdt_hash=crush" it looks like it is looping forever in yaml code:
__libc_start_main(0x4209b5, 4, 0x7ffdf2c34e38, 0x42af80 <unfinished ...>
setlinebuf(0x7fcc8c685400) = <void>
register_ioc_dev(0, 0x42b257, 0x7ffdf2c34e38, 0) = 0
register_ioc_dev(1, 0x42e0ce, 0x7ffdf2c34e38, 0) = 1
llapi_set_command_name(0x7ffdf2c36fa9, 0x7ffdf2c34e38, 0x7ffdf2c34e38, 0) = 0x7fcc8d88c5a0
cfs_parser(4, 0x7ffdf2c34e38, 0x63ea40, 0x7ffdf2c34e38 <unfinished ...>
memset(0x7ffdf2c34c70, '\0', 8) = 0x7ffdf2c34c70
getopt(3, 0x7ffdf2c34e40, "dFnPt::") = 80
getopt(3, 0x7ffdf2c34e40, "dFnPt::") = -1
memset(0x7ffdf2c34b00, '\0', 104) = 0x7ffdf2c34b00
strlen("general") = 7
strlen("lod.*.mdt_hash=crush") = 20
malloc(72) = 0x1dc5010
memcpy(0x1dc5038, "general\0", 8) = 0x1dc5038
memcpy(0x1dc5040, "lod.*.mdt_hash=crush\0", 21) = 0x1dc5040
memset(0x7ffdf2c34890, '\0', 576) = 0x7ffdf2c34890
strspn("$MGS", "0123456789") = 0
strlen("$MGS") = 4
nl_socket_alloc(0x7ffdf2c32841, 0x7ffdf2c32841, 0xffffffff, 0x7ffdf2c32841) = 0x1dc5150
yaml_parser_initialize(0x7ffdf2c30170, 0x1dc5190, 16, 9) = 1
yaml_parser_set_input_netlink(0x7ffdf2c30170, 0x1dc5150, 0, 0x1dc5150) = 1
yaml_emitter_initialize(0x7ffdf2c30350, 3, 0, 0x1dd5ca0) = 1
yaml_emitter_set_output_netlink(0x7ffdf2c30350, 0x1dc5150, 0x42f96f, 1) = 0
yaml_emitter_log_error(0x7ffdf2c30350, 0x7fcc8c6851c0, 0x7fcc8c6851c0, 0x1df2500Writer error: failed to resolve Netlink family id
) = 50
yaml_emitter_delete(0x7ffdf2c30350, 0, 0, 50) = 0
yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
:
[repeats forever]
:
Running "lctl get_param lod.*.mdt_hash" or "lctl set_param lod.*.mdt_hash=crush" works OK, so it appears to be something to do with setting permanent parameters using netlink:
# ltrace ./lustre/utils/.libs/lt-lctl set_param lod.*.mdt_hash=crush |& tee /tmp/lctl-ok.log
__libc_start_main(0x4209b5, 3, 0x7fffaf57bb78, 0x42af80 <unfinished ...>
setlinebuf(0x7fe438feb400) = <void>
register_ioc_dev(0, 0x42b257, 0x7fffaf57bb78, 0) = 0
register_ioc_dev(1, 0x42e0ce, 0x7fffaf57bb78, 0) = 1
llapi_set_command_name(0x7fffaf57bfac, 0x7fffaf57bb78, 0x7fffaf57bb78, 0) = 0x7fe43a1f25a0
cfs_parser(3, 0x7fffaf57bb78, 0x63ea40, 0x7fffaf57bb78 <unfinished ...>
memset(0x7fffaf57b9b0, '\0', 8) = 0x7fffaf57b9b0
getopt(2, 0x7fffaf57bb80, "dFnPt::") = -1
strchr("lod.*.mdt_hash=crush", '=') = "=crush"
strrchr("lod.*.mdt_hash", '\\') = nil
strchr("lod.*.mdt_hash", '@') = nil
strchr("lod.*.mdt_hash", '.') = ".*.mdt_hash"
strchr("*.mdt_hash", '.') = ".mdt_hash"
strchr("mdt_hash", '.') = nil
llapi_param_get_paths(0x7fffaf57bfb6, 0x7fffaf57b8a0, 0x7fffaf57b8a0, 2) = 0
calloc(4, 8) = 0x20d4090
__xstat64(1, "/sys/fs/lustre/lod/testfs-MDT000"..., 0x7fffaf579800) = 0
strstr("/sys/fs/lustre/lod/testfs-MDT000"..., "/lustre/") = "/lustre/lod/testfs-MDT0000-mdtlo"...
strdup("lod/testfs-MDT0000-mdtlov/mdt_ha"...) = 0x20d4300
strchr("lod/testfs-MDT0000-mdtlov/mdt_ha"..., '/') = "/testfs-MDT0000-mdtlov/mdt_hash"
strchr(".testfs-MDT0000-mdtlov/mdt_hash", '/') = "/mdt_hash"
strchr(".mdt_hash", '/') = nil
open64("/sys/fs/lustre/lod/testfs-MDT000"..., 1, 025725734660) = 3
strlen("crush") = 5
write(3, "crush", 5) = 5
strlen("crush") = 5
printf("%s=%s\n", "lod.testfs-MDT0000-mdtlov.mdt_ha"..., "crush"lod.testfs-MDT0000-mdtlov.mdt_hash=crush
) = 41
close(3) = 0
:
[repeats for other MDTs]
:
I thought there were fallbacks for old behavior in case the Netlink/YAML was not working?
Attachments
Issue Links
- is related to
-
LU-9680 Improve the user land to kernel space interface for lustre
-
- In Progress
-