Details
-
Bug
-
Resolution: Fixed
-
Minor
-
None
-
3
-
9223372036854775807
Description
Running "lctl set_param -P lod.*.mdt_hash=crush" or "lctl conf_param testfs.quota.mdt=ug3" hangs when running on an older system (el7.9 running 2.14.0 kernel modules):
Using TIMEOUT=20 Writer error: failed to resolve Netlink family id Writer error: failed to resolve Netlink family id osc.testfs-OST0000-osc-ffff95fc0529ae98.idle_timeout=debug osc.testfs-OST0001-osc-ffff95fc0529ae98.idle_timeout=debug osc.testfs-OST0002-osc-ffff95fc0529ae98.idle_timeout=debug osc.testfs-OST0003-osc-ffff95fc0529ae98.idle_timeout=debug disable quota as required Writer error: failed to resolve Netlink family id
Running with "ltrace lctl set_param -P lod.*.mdt_hash=crush" it looks like it is looping forever in yaml code:
__libc_start_main(0x4209b5, 4, 0x7ffdf2c34e38, 0x42af80 <unfinished ...> setlinebuf(0x7fcc8c685400) = <void> register_ioc_dev(0, 0x42b257, 0x7ffdf2c34e38, 0) = 0 register_ioc_dev(1, 0x42e0ce, 0x7ffdf2c34e38, 0) = 1 llapi_set_command_name(0x7ffdf2c36fa9, 0x7ffdf2c34e38, 0x7ffdf2c34e38, 0) = 0x7fcc8d88c5a0 cfs_parser(4, 0x7ffdf2c34e38, 0x63ea40, 0x7ffdf2c34e38 <unfinished ...> memset(0x7ffdf2c34c70, '\0', 8) = 0x7ffdf2c34c70 getopt(3, 0x7ffdf2c34e40, "dFnPt::") = 80 getopt(3, 0x7ffdf2c34e40, "dFnPt::") = -1 memset(0x7ffdf2c34b00, '\0', 104) = 0x7ffdf2c34b00 strlen("general") = 7 strlen("lod.*.mdt_hash=crush") = 20 malloc(72) = 0x1dc5010 memcpy(0x1dc5038, "general\0", 8) = 0x1dc5038 memcpy(0x1dc5040, "lod.*.mdt_hash=crush\0", 21) = 0x1dc5040 memset(0x7ffdf2c34890, '\0', 576) = 0x7ffdf2c34890 strspn("$MGS", "0123456789") = 0 strlen("$MGS") = 4 nl_socket_alloc(0x7ffdf2c32841, 0x7ffdf2c32841, 0xffffffff, 0x7ffdf2c32841) = 0x1dc5150 yaml_parser_initialize(0x7ffdf2c30170, 0x1dc5190, 16, 9) = 1 yaml_parser_set_input_netlink(0x7ffdf2c30170, 0x1dc5150, 0, 0x1dc5150) = 1 yaml_emitter_initialize(0x7ffdf2c30350, 3, 0, 0x1dd5ca0) = 1 yaml_emitter_set_output_netlink(0x7ffdf2c30350, 0x1dc5150, 0x42f96f, 1) = 0 yaml_emitter_log_error(0x7ffdf2c30350, 0x7fcc8c6851c0, 0x7fcc8c6851c0, 0x1df2500Writer error: failed to resolve Netlink family id ) = 50 yaml_emitter_delete(0x7ffdf2c30350, 0, 0, 50) = 0 yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1 yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0 yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1 yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0 yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1 yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0 yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1 yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0 yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1 : [repeats forever] :
Running "lctl get_param lod.*.mdt_hash" or "lctl set_param lod.*.mdt_hash=crush" works OK, so it appears to be something to do with setting permanent parameters using netlink:
# ltrace ./lustre/utils/.libs/lt-lctl set_param lod.*.mdt_hash=crush |& tee /tmp/lctl-ok.log __libc_start_main(0x4209b5, 3, 0x7fffaf57bb78, 0x42af80 <unfinished ...> setlinebuf(0x7fe438feb400) = <void> register_ioc_dev(0, 0x42b257, 0x7fffaf57bb78, 0) = 0 register_ioc_dev(1, 0x42e0ce, 0x7fffaf57bb78, 0) = 1 llapi_set_command_name(0x7fffaf57bfac, 0x7fffaf57bb78, 0x7fffaf57bb78, 0) = 0x7fe43a1f25a0 cfs_parser(3, 0x7fffaf57bb78, 0x63ea40, 0x7fffaf57bb78 <unfinished ...> memset(0x7fffaf57b9b0, '\0', 8) = 0x7fffaf57b9b0 getopt(2, 0x7fffaf57bb80, "dFnPt::") = -1 strchr("lod.*.mdt_hash=crush", '=') = "=crush" strrchr("lod.*.mdt_hash", '\\') = nil strchr("lod.*.mdt_hash", '@') = nil strchr("lod.*.mdt_hash", '.') = ".*.mdt_hash" strchr("*.mdt_hash", '.') = ".mdt_hash" strchr("mdt_hash", '.') = nil llapi_param_get_paths(0x7fffaf57bfb6, 0x7fffaf57b8a0, 0x7fffaf57b8a0, 2) = 0 calloc(4, 8) = 0x20d4090 __xstat64(1, "/sys/fs/lustre/lod/testfs-MDT000"..., 0x7fffaf579800) = 0 strstr("/sys/fs/lustre/lod/testfs-MDT000"..., "/lustre/") = "/lustre/lod/testfs-MDT0000-mdtlo"... strdup("lod/testfs-MDT0000-mdtlov/mdt_ha"...) = 0x20d4300 strchr("lod/testfs-MDT0000-mdtlov/mdt_ha"..., '/') = "/testfs-MDT0000-mdtlov/mdt_hash" strchr(".testfs-MDT0000-mdtlov/mdt_hash", '/') = "/mdt_hash" strchr(".mdt_hash", '/') = nil open64("/sys/fs/lustre/lod/testfs-MDT000"..., 1, 025725734660) = 3 strlen("crush") = 5 write(3, "crush", 5) = 5 strlen("crush") = 5 printf("%s=%s\n", "lod.testfs-MDT0000-mdtlov.mdt_ha"..., "crush"lod.testfs-MDT0000-mdtlov.mdt_hash=crush ) = 41 close(3) = 0 : [repeats for other MDTs] :
I thought there were fallbacks for old behavior in case the Netlink/YAML was not working?
Attachments
Issue Links
- is related to
-
LU-9680 Improve the user land to kernel space interface for lustre
- In Progress