Uploaded image for project: 'Lustre'
  1. Lustre
  2. LU-17687

'lctl set_param/conf_param' hung running on older system

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Minor
    • Lustre 2.16.0
    • None
    • 3
    • 9223372036854775807

    Description

      Running "lctl set_param -P lod.*.mdt_hash=crush" or "lctl conf_param testfs.quota.mdt=ug3" hangs when running on an older system (el7.9 running 2.14.0 kernel modules):

      Using TIMEOUT=20
      Writer error: failed to resolve Netlink family id
      Writer error: failed to resolve Netlink family id
      osc.testfs-OST0000-osc-ffff95fc0529ae98.idle_timeout=debug
      osc.testfs-OST0001-osc-ffff95fc0529ae98.idle_timeout=debug
      osc.testfs-OST0002-osc-ffff95fc0529ae98.idle_timeout=debug
      osc.testfs-OST0003-osc-ffff95fc0529ae98.idle_timeout=debug
      disable quota as required
      Writer error: failed to resolve Netlink family id
      

      Running with "ltrace lctl set_param -P lod.*.mdt_hash=crush" it looks like it is looping forever in yaml code:

      __libc_start_main(0x4209b5, 4, 0x7ffdf2c34e38, 0x42af80 <unfinished ...>
      setlinebuf(0x7fcc8c685400)                       = <void>
      register_ioc_dev(0, 0x42b257, 0x7ffdf2c34e38, 0) = 0
      register_ioc_dev(1, 0x42e0ce, 0x7ffdf2c34e38, 0) = 1
      llapi_set_command_name(0x7ffdf2c36fa9, 0x7ffdf2c34e38, 0x7ffdf2c34e38, 0) = 0x7fcc8d88c5a0
      cfs_parser(4, 0x7ffdf2c34e38, 0x63ea40, 0x7ffdf2c34e38 <unfinished ...>
      memset(0x7ffdf2c34c70, '\0', 8)                  = 0x7ffdf2c34c70
      getopt(3, 0x7ffdf2c34e40, "dFnPt::")             = 80
      getopt(3, 0x7ffdf2c34e40, "dFnPt::")             = -1
      memset(0x7ffdf2c34b00, '\0', 104)                = 0x7ffdf2c34b00
      strlen("general")                                = 7
      strlen("lod.*.mdt_hash=crush")                   = 20
      malloc(72)                                       = 0x1dc5010
      memcpy(0x1dc5038, "general\0", 8)                = 0x1dc5038
      memcpy(0x1dc5040, "lod.*.mdt_hash=crush\0", 21)  = 0x1dc5040
      memset(0x7ffdf2c34890, '\0', 576)                = 0x7ffdf2c34890
      strspn("$MGS", "0123456789")                     = 0
      strlen("$MGS")                                   = 4
      nl_socket_alloc(0x7ffdf2c32841, 0x7ffdf2c32841, 0xffffffff, 0x7ffdf2c32841) = 0x1dc5150
      yaml_parser_initialize(0x7ffdf2c30170, 0x1dc5190, 16, 9) = 1
      yaml_parser_set_input_netlink(0x7ffdf2c30170, 0x1dc5150, 0, 0x1dc5150) = 1
      yaml_emitter_initialize(0x7ffdf2c30350, 3, 0, 0x1dd5ca0) = 1
      yaml_emitter_set_output_netlink(0x7ffdf2c30350, 0x1dc5150, 0x42f96f, 1) = 0
      yaml_emitter_log_error(0x7ffdf2c30350, 0x7fcc8c6851c0, 0x7fcc8c6851c0, 0x1df2500Writer error: failed to resolve Netlink family id
      ) = 50
      yaml_emitter_delete(0x7ffdf2c30350, 0, 0, 50)    = 0
      yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
      yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
      yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
      yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
      yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
      yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
      yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
      yaml_event_delete(0x7ffdf2c30100, 0x7ffdf2c30100, 0, 0) = 0
      yaml_parser_parse(0x7ffdf2c30170, 0x7ffdf2c30100, 0x7ffdf2c30100, 0) = 1
      :
      [repeats forever]
      :
      

      Running "lctl get_param lod.*.mdt_hash" or "lctl set_param lod.*.mdt_hash=crush" works OK, so it appears to be something to do with setting permanent parameters using netlink:

      # ltrace ./lustre/utils/.libs/lt-lctl set_param lod.*.mdt_hash=crush |& tee /tmp/lctl-ok.log
      __libc_start_main(0x4209b5, 3, 0x7fffaf57bb78, 0x42af80 <unfinished ...>
      setlinebuf(0x7fe438feb400)                       = <void>
      register_ioc_dev(0, 0x42b257, 0x7fffaf57bb78, 0) = 0
      register_ioc_dev(1, 0x42e0ce, 0x7fffaf57bb78, 0) = 1
      llapi_set_command_name(0x7fffaf57bfac, 0x7fffaf57bb78, 0x7fffaf57bb78, 0) = 0x7fe43a1f25a0
      cfs_parser(3, 0x7fffaf57bb78, 0x63ea40, 0x7fffaf57bb78 <unfinished ...>
      memset(0x7fffaf57b9b0, '\0', 8)                  = 0x7fffaf57b9b0
      getopt(2, 0x7fffaf57bb80, "dFnPt::")             = -1
      strchr("lod.*.mdt_hash=crush", '=')              = "=crush"
      strrchr("lod.*.mdt_hash", '\\')                  = nil
      strchr("lod.*.mdt_hash", '@')                    = nil
      strchr("lod.*.mdt_hash", '.')                    = ".*.mdt_hash"
      strchr("*.mdt_hash", '.')                        = ".mdt_hash"
      strchr("mdt_hash", '.')                          = nil
      llapi_param_get_paths(0x7fffaf57bfb6, 0x7fffaf57b8a0, 0x7fffaf57b8a0, 2) = 0
      calloc(4, 8)                                     = 0x20d4090
      __xstat64(1, "/sys/fs/lustre/lod/testfs-MDT000"..., 0x7fffaf579800) = 0
      strstr("/sys/fs/lustre/lod/testfs-MDT000"..., "/lustre/") = "/lustre/lod/testfs-MDT0000-mdtlo"...
      strdup("lod/testfs-MDT0000-mdtlov/mdt_ha"...)    = 0x20d4300
      strchr("lod/testfs-MDT0000-mdtlov/mdt_ha"..., '/') = "/testfs-MDT0000-mdtlov/mdt_hash"
      strchr(".testfs-MDT0000-mdtlov/mdt_hash", '/')   = "/mdt_hash"
      strchr(".mdt_hash", '/')                         = nil
      open64("/sys/fs/lustre/lod/testfs-MDT000"..., 1, 025725734660) = 3
      strlen("crush")                                  = 5
      write(3, "crush", 5)                             = 5
      strlen("crush")                                  = 5
      printf("%s=%s\n", "lod.testfs-MDT0000-mdtlov.mdt_ha"..., "crush"lod.testfs-MDT0000-mdtlov.mdt_hash=crush
      ) = 41
      close(3)                                         = 0
      :
      [repeats for other MDTs]
      :
      

      I thought there were fallbacks for old behavior in case the Netlink/YAML was not working?

      Attachments

        Issue Links

          Activity

            People

              simmonsja James A Simmons
              adilger Andreas Dilger
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: