[LU-13491] lfs changelog command not working on 2.12.4 client to 2.10.7 server Created: 29/Apr/20  Updated: 11/Jun/20

Status: Open
Project: Lustre
Component/s: None
Affects Version/s: Lustre 2.10.7, Lustre 2.12.4
Fix Version/s: None

Type: Question/Request Priority: Minor
Reporter: Luis Silva Assignee: WC Triage
Resolution: Unresolved Votes: 0
Labels: None
Environment:

CentOS 7 (3.10.0-957.12.2.el7.x86_64), Dell PowerEdge R730xd


Rank (Obsolete): 9223372036854775807

 Description   

So I have a question about running the lfs changelog command. We've registered the changelog user on the MDT and are attempting to run the lfs changelog command against a few records to test. 

On MDS:

[root@holylfs02mds02 ~]# lctl get_param mdd.holylfs2-MDT0000.changelog_users
mdd.holylfs2-MDT0000.changelog_users=
current index: 16122510026
ID index
cl3 16122503691

 

On Client: 

/usr/bin/lfs changelog holylfs2-MDT0000 16122503691 16122503699

hangs...

 

No Dmesg output at all on either the client or the MDS but when I strace the command on the client I get this:

  1. strace /usr/bin/lfs changelog holylfs2-MDT0000 16122503691 16122503699
    execve("/usr/bin/lfs", ["/usr/bin/lfs", "changelog", "holylfs2-MDT0000", "16122503691", "16122503699"], [/* 32 vars */]) = 0
    brk(NULL) = 0xeef000
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1e62000
    access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
    open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
    fstat(3, {st_mode=S_IFREG|0644, st_size=67378, ...}) = 0
    mmap(NULL, 67378, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7ef1e51000
    close(3) = 0
    open("/lib64/liblustreapi.so.1", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 s\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=159104, ...}) = 0
    mmap(NULL, 2288240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef1a13000
    mprotect(0x7f7ef1a38000, 2093056, PROT_NONE) = 0
    mmap(0x7f7ef1c37000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x24000) = 0x7f7ef1c37000
    mmap(0x7f7ef1c39000, 35440, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1c39000
    close(3) = 0
    open("/lib64/libz.so.1", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20!\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=90248, ...}) = 0
    mmap(NULL, 2183272, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef17fd000
    mprotect(0x7f7ef1812000, 2093056, PROT_NONE) = 0
    mmap(0x7f7ef1a11000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f7ef1a11000
    close(3) = 0
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1e50000
    open("/lib64/liblnetconfig.so.4", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300X\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=109600, ...}) = 0
    mmap(NULL, 2235136, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef15db000
    mprotect(0x7f7ef15f4000, 2093056, PROT_NONE) = 0
    mmap(0x7f7ef17f3000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18000) = 0x7f7ef17f3000
    mmap(0x7f7ef17f5000, 31488, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f7ef17f5000
    close(3) = 0
    open("/lib64/libyaml-0.so.2", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\32\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=131096, ...}) = 0
    mmap(NULL, 2224456, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef13bb000
    mprotect(0x7f7ef13da000, 2093056, PROT_NONE) = 0
    mmap(0x7f7ef15d9000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e000) = 0x7f7ef15d9000
    close(3) = 0
    open("/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0PS\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=1136944, ...}) = 0
    mmap(NULL, 3150136, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef10b9000
    mprotect(0x7f7ef11ba000, 2093056, PROT_NONE) = 0
    mmap(0x7f7ef13b9000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x100000) = 0x7f7ef13b9000
    close(3) = 0
    open("/lib64/libreadline.so.6", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0PO\1\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=285240, ...}) = 0
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1e4f000
    mmap(NULL, 2380744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef0e73000
    mprotect(0x7f7ef0eaf000, 2097152, PROT_NONE) = 0
    mmap(0x7f7ef10af000, 32768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3c000) = 0x7f7ef10af000
    mmap(0x7f7ef10b7000, 5064, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f7ef10b7000
    close(3) = 0
    open("/lib64/libkeyutils.so.1", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260\25\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=15688, ...}) = 0
    mmap(NULL, 2109720, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef0c6f000
    mprotect(0x7f7ef0c72000, 2093056, PROT_NONE) = 0
    mmap(0x7f7ef0e71000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f7ef0e71000
    close(3) = 0
    open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`&\2\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=2156240, ...}) = 0
    mmap(NULL, 3985920, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef08a1000
    mprotect(0x7f7ef0a64000, 2097152, PROT_NONE) = 0
    mmap(0x7f7ef0c64000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c3000) = 0x7f7ef0c64000
    mmap(0x7f7ef0c6a000, 16896, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f7ef0c6a000
    close(3) = 0
    open("/lib64/libtinfo.so.5", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\316\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=174576, ...}) = 0
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1e4e000
    mmap(NULL, 2268928, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef0677000
    mprotect(0x7f7ef069c000, 2097152, PROT_NONE) = 0
    mmap(0x7f7ef089c000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x25000) = 0x7f7ef089c000
    close(3) = 0
    open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\16\0\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=19248, ...}) = 0
    mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7ef0473000
    mprotect(0x7f7ef0475000, 2097152, PROT_NONE) = 0
    mmap(0x7f7ef0675000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f7ef0675000
    close(3) = 0
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1e4d000
    mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7ef1e4b000
    arch_prctl(ARCH_SET_FS, 0x7f7ef1e4b740) = 0
    mprotect(0x7f7ef0c64000, 16384, PROT_READ) = 0
    mprotect(0x7f7ef0675000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef089c000, 16384, PROT_READ) = 0
    mprotect(0x7f7ef0e71000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef10af000, 8192, PROT_READ) = 0
    mprotect(0x7f7ef13b9000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef15d9000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef17f3000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef1a11000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef1c37000, 4096, PROT_READ) = 0
    mprotect(0x625000, 4096, PROT_READ) = 0
    mprotect(0x7f7ef1e63000, 4096, PROT_READ) = 0
    munmap(0x7f7ef1e51000, 67378) = 0
    gettid() = 32258
    open("/dev/urandom", O_RDONLY|O_NOFOLLOW) = 3
    read(3, "\27\344\247+", 4) = 4
    close(3) = 0
    brk(NULL) = 0xeef000
    brk(0xf10000) = 0xf10000
    brk(NULL) = 0xf10000
    open("/dev/changelog-holylfs2-MDT0000", O_RDONLY) = 3
    lseek(3, 16122503691, SEEK_SET) = 16122503691
    read(3

 

Any clue as to why it's hanging trying to read /dev/changelog-holylfs2-MDT0000. The device does seem to exist when I run this. This connection is over IB and is not running over lnet routes. I'm running 2.12.4 on the client and the server is 2.10.7. Any help would be much appreciated. 

 



 Comments   
Comment by Luis Silva [ 11/Jun/20 ]

We were able to address the issue by following the extreme changelog user clear out method outline here: https://jira.whamcloud.com/browse/LU-11980?focusedCommentId=244872&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-244872

Generated at Sat Feb 10 03:01:43 UTC 2024 using Jira 9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c.