[LU-10822] sanity test 27b fails with “*** buffer overflow detected ***: /usr/bin/lfs terminated” Created: 16/Mar/18 Updated: 30/Mar/18 Resolved: 30/Mar/18 |
|
| Status: | Resolved |
| Project: | Lustre |
| Component/s: | None |
| Affects Version/s: | Lustre 2.11.0 |
| Fix Version/s: | Lustre 2.11.0 |
| Type: | Bug | Priority: | Major |
| Reporter: | James Nunez (Inactive) | Assignee: | James A Simmons |
| Resolution: | Fixed | Votes: | 0 |
| Labels: | ubuntu | ||
| Issue Links: |
|
||||
| Severity: | 3 | ||||
| Rank (Obsolete): | 9223372036854775807 | ||||
| Description |
|
sanity test_27b fails with error /usr/lib64/lustre/tests/sanity.sh: line 1434: [: -eq: unary operator expected sanity test_27b: @@@@@@ FAIL: two-stripe file doesn't have two stripes Looking at the client test_log, we see what the real issue is == sanity test 27b: create and write to two stripe file ============================================== 09:49:29 (1521020969) *** buffer overflow detected ***: /usr/bin/lfs terminated ======= Backtrace: ========= /lib/x86_64-linux-gnu/libc.so.6(+0x777e5)[0x7f656102c7e5] /lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x5c)[0x7f65610ce15c] /lib/x86_64-linux-gnu/libc.so.6(+0x117160)[0x7f65610cc160] /lib/x86_64-linux-gnu/libc.so.6(+0x1168dd)[0x7f65610cb8dd] /lib/x86_64-linux-gnu/libc.so.6(__snprintf_chk+0x78)[0x7f65610cb7f8] /usr/lib/liblustreapi.so.1(+0x10065)[0x7f65617ca065] /usr/lib/liblustreapi.so.1(+0x78ec)[0x7f65617c18ec] /usr/lib/liblustreapi.so.1(+0x9f64)[0x7f65617c3f64] /usr/bin/lfs[0x413e87] /usr/lib/liblustreapi.so.1(Parser_execarg+0x51)[0x7f65617d5381] /usr/bin/lfs[0x4042cc] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6560fd5830] /usr/bin/lfs[0x404349] ======= Memory map: ======== 00400000-00421000 r-xp 00000000 fd:01 1451341 /usr/bin/lfs 00621000-00622000 r--p 00021000 fd:01 1451341 /usr/bin/lfs 00622000-00623000 rw-p 00022000 fd:01 1451341 /usr/bin/lfs 009b6000-009d7000 rw-p 00000000 00:00 0 [heap] 7f6560408000-7f656041e000 r-xp 00000000 fd:01 5767689 /lib/x86_64-linux-gnu/libgcc_s.so.1 7f656041e000-7f656061d000 ---p 00016000 fd:01 5767689 /lib/x86_64-linux-gnu/libgcc_s.so.1 7f656061d000-7f656061e000 rw-p 00015000 fd:01 5767689 /lib/x86_64-linux-gnu/libgcc_s.so.1 7f656061e000-7f6560643000 r-xp 00000000 fd:01 5767774 /lib/x86_64-linux-gnu/libtinfo.so.5.9 7f6560643000-7f6560842000 ---p 00025000 fd:01 5767774 /lib/x86_64-linux-gnu/libtinfo.so.5.9 7f6560842000-7f6560846000 r--p 00024000 fd:01 5767774 /lib/x86_64-linux-gnu/libtinfo.so.5.9 7f6560846000-7f6560847000 rw-p 00028000 fd:01 5767774 /lib/x86_64-linux-gnu/libtinfo.so.5.9 7f6560847000-7f656094f000 r-xp 00000000 fd:01 5767444 /lib/x86_64-linux-gnu/libm-2.23.so 7f656094f000-7f6560b4e000 ---p 00108000 fd:01 5767444 /lib/x86_64-linux-gnu/libm-2.23.so 7f6560b4e000-7f6560b4f000 r--p 00107000 fd:01 5767444 /lib/x86_64-linux-gnu/libm-2.23.so 7f6560b4f000-7f6560b50000 rw-p 00108000 fd:01 5767444 /lib/x86_64-linux-gnu/libm-2.23.so 7f6560b50000-7f6560b6d000 r-xp 00000000 fd:01 1453147 /usr/lib/x86_64-linux-gnu/libyaml-0.so.2.0.4 7f6560b6d000-7f6560d6d000 ---p 0001d000 fd:01 1453147 /usr/lib/x86_64-linux-gnu/libyaml-0.so.2.0.4 7f6560d6d000-7f6560d6e000 r--p 0001d000 fd:01 1453147 /usr/lib/x86_64-linux-gnu/libyaml-0.so.2.0.4 7f6560d6e000-7f6560d6f000 rw-p 0001e000 fd:01 1453147 /usr/lib/x86_64-linux-gnu/libyaml-0.so.2.0.4 7f6560d6f000-7f6560dac000 r-xp 00000000 fd:01 5767753 /lib/x86_64-linux-gnu/libreadline.so.6.3 7f6560dac000-7f6560fac000 ---p 0003d000 fd:01 5767753 /lib/x86_64-linux-gnu/libreadline.so.6.3 7f6560fac000-7f6560fae000 r--p 0003d000 fd:01 5767753 /lib/x86_64-linux-gnu/libreadline.so.6.3 7f6560fae000-7f6560fb4000 rw-p 0003f000 fd:01 5767753 /lib/x86_64-linux-gnu/libreadline.so.6.3 7f6560fb4000-7f6560fb5000 rw-p 00000000 00:00 0 7f6560fb5000-7f6561175000 r-xp 00000000 fd:01 5767448 /lib/x86_64-linux-gnu/libc-2.23.so 7f6561175000-7f6561375000 ---p 001c0000 fd:01 5767448 /lib/x86_64-linux-gnu/libc-2.23.so 7f6561375000-7f6561379000 r--p 001c0000 fd:01 5767448 /lib/x86_64-linux-gnu/libc-2.23.so 7f6561379000-7f656137b000 rw-p 001c4000 fd:01 5767448 /lib/x86_64-linux-gnu/libc-2.23.so 7f656137b000-7f656137f000 rw-p 00000000 00:00 0 7f656137f000-7f6561397000 r-xp 00000000 fd:01 1451349 /usr/lib/liblnetconfig.so.4.0.0 7f6561397000-7f6561596000 ---p 00018000 fd:01 1451349 /usr/lib/liblnetconfig.so.4.0.0 7f6561596000-7f6561597000 r--p 00017000 fd:01 1451349 /usr/lib/liblnetconfig.so.4.0.0 7f6561597000-7f6561598000 rw-p 00018000 fd:01 1451349 /usr/lib/liblnetconfig.so.4.0.0 7f6561598000-7f65615a0000 rw-p 00000000 00:00 0 7f65615a0000-7f65615b9000 r-xp 00000000 fd:01 5767786 /lib/x86_64-linux-gnu/libz.so.1.2.8 7f65615b9000-7f65617b8000 ---p 00019000 fd:01 5767786 /lib/x86_64-linux-gnu/libz.so.1.2.8 7f65617b8000-7f65617b9000 r--p 00018000 fd:01 5767786 /lib/x86_64-linux-gnu/libz.so.1.2.8 7f65617b9000-7f65617ba000 rw-p 00019000 fd:01 5767786 /lib/x86_64-linux-gnu/libz.so.1.2.8 7f65617ba000-7f65617de000 r-xp 00000000 fd:01 1451350 /usr/lib/liblustreapi.so.1.0.0 7f65617de000-7f65619dd000 ---p 00024000 fd:01 1451350 /usr/lib/liblustreapi.so.1.0.0 7f65619dd000-7f65619de000 r--p 00023000 fd:01 1451350 /usr/lib/liblustreapi.so.1.0.0 7f65619de000-7f65619df000 rw-p 00024000 fd:01 1451350 /usr/lib/liblustreapi.so.1.0.0 7f65619df000-7f65619e8000 rw-p 00000000 00:00 0 7f65619e8000-7f6561a0e000 r-xp 00000000 fd:01 5767446 /lib/x86_64-linux-gnu/ld-2.23.so 7f6561bfd000-7f6561c03000 rw-p 00000000 00:00 0 7f6561c0c000-7f6561c0d000 rw-p 00000000 00:00 0 7f6561c0d000-7f6561c0e000 r--p 00025000 fd:01 5767446 /lib/x86_64-linux-gnu/ld-2.23.so 7f6561c0e000-7f6561c0f000 rw-p 00026000 fd:01 5767446 /lib/x86_64-linux-gnu/ld-2.23.so 7f6561c0f000-7f6561c10000 rw-p 00000000 00:00 0 7ffedfb87000-7ffedfba9000 rw-p 00000000 00:00 0 [stack] 7ffedfbef000-7ffedfbf2000 r--p 00000000 00:00 0 [vvar] 7ffedfbf2000-7ffedfbf4000 r-xp 00000000 00:00 0 [vdso] ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] /usr/lib64/lustre/tests/sanity.sh: line 1429: 5280 Aborted (core dumped) $LFS getstripe -c $DIR/$tdir/$tfile It looks like this issue started on 2018-02-27 16:23:17 UTC. So far, we’ve only seen this while testing Ubuntu 16.04 clients. Here are logs for a few failed test suites https://testing.hpdd.intel.com/test_sets/5ae8fc72-287c-11e8-9e0e-52540065bddc https://testing.hpdd.intel.com/test_sets/9f2131ee-2894-11e8-b3c6-52540065bddc https://testing.hpdd.intel.com/test_sets/f530fcd0-1cb5-11e8-a7cd-52540065bddc https://testing.hpdd.intel.com/test_sets/4d231688-2195-11e8-b046-52540065bddc In these test sessions, test 27f, 27i, 27z, 27C, 27F, 56a, 56w, 56wb, 56wc, 56x, 56xa, 56xb, 65i, 102b, 102c, 102d, 102f, 102j, 130a/b/c/d/e, 229, 270a/c/d, 311 fail with a buffer overflow detected in lfs. In these test sessions, we also see the follow tests fail with this error: sanity-lfsck 18d, 18e, 18g, 20a, 20b sanityn test 51c sanity-hsm test 11a sanity-flr test 0a, 0b, 0c, 0d, 0e, 0f, 0g, 0h, 1, 2, 4, 5, 32, 32, 36, 37, 38, 39, 40, 41, 42, 43, 45, 200 sanity-dom test 42e, 251, 51c, 51d
|
| Comments |
| Comment by Justin Miller [ 26/Mar/18 ] |
|
I am also seeing a buffer overflow with `lfs getstripe` on a SLES12SP3 client running 2.10.59 # lfs setstripe -c -1 all_ost # lfs getstripe all_ost *** buffer overflow detected ***: lfs terminated ======= Backtrace: ========= /lib64/libc.so.6(+0x721af)[0x7f0dbfecb1af] /lib64/libc.so.6(__fortify_fail+0x37)[0x7f0dbff52dc7] /lib64/libc.so.6(+0xf8050)[0x7f0dbff51050] /lib64/libc.so.6(+0xf77bb)[0x7f0dbff507bb] /lib64/libc.so.6(__snprintf_chk+0x78)[0x7f0dbff506d8] /usr/lib64/liblustreapi.so.1(+0xfce4)[0x7f0dc0b5ece4] /usr/lib64/liblustreapi.so.1(+0x7e7c)[0x7f0dc0b56e7c] /usr/lib64/liblustreapi.so.1(+0xa354)[0x7f0dc0b59354] lfs[0x413958] /usr/lib64/liblustreapi.so.1(Parser_execarg+0x51)[0x7f0dc0b69511] lfs[0x40478e] /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f0dbfe796e5] lfs[0x404809] ======= Memory map: ======== 00400000-00421000 r-xp 00000000 07:02 8959 /usr/bin/lfs 00620000-00621000 r--p 00020000 07:02 8959 /usr/bin/lfs 00621000-00622000 rw-p 00021000 07:02 8959 /usr/bin/lfs 00622000-00643000 rw-p 00000000 00:00 0 [heap] 7f0dbfc42000-7f0dbfc58000 r-xp 00000000 07:02 3849 /lib64/libgcc_s.so.1 7f0dbfc58000-7f0dbfe57000 ---p 00016000 07:02 3849 /lib64/libgcc_s.so.1 7f0dbfe57000-7f0dbfe58000 r--p 00015000 07:02 3849 /lib64/libgcc_s.so.1 7f0dbfe58000-7f0dbfe59000 rw-p 00016000 07:02 3849 /lib64/libgcc_s.so.1 7f0dbfe59000-7f0dbfff2000 r-xp 00000000 00:17 49292 /lib64/libc-2.22.so 7f0dbfff2000-7f0dc01f2000 ---p 00199000 00:17 49292 /lib64/libc-2.22.so 7f0dc01f2000-7f0dc01f6000 r--p 00199000 00:17 49292 /lib64/libc-2.22.so 7f0dc01f6000-7f0dc01f8000 rw-p 0019d000 00:17 49292 /lib64/libc-2.22.so 7f0dc01f8000-7f0dc01fc000 rw-p 00000000 00:00 0 7f0dc01fc000-7f0dc02f8000 r-xp 00000000 00:17 64019 /lib64/libm-2.22.so 7f0dc02f8000-7f0dc04f7000 ---p 000fc000 00:17 64019 /lib64/libm-2.22.so 7f0dc04f7000-7f0dc04f8000 r--p 000fb000 00:17 64019 /lib64/libm-2.22.so 7f0dc04f8000-7f0dc04f9000 rw-p 000fc000 00:17 64019 /lib64/libm-2.22.so 7f0dc04f9000-7f0dc0518000 r-xp 00000000 07:02 47076 /usr/lib64/libyaml-0.so.2.0.4 7f0dc0518000-7f0dc0717000 ---p 0001f000 07:02 47076 /usr/lib64/libyaml-0.so.2.0.4 7f0dc0717000-7f0dc0718000 r--p 0001e000 07:02 47076 /usr/lib64/libyaml-0.so.2.0.4 7f0dc0718000-7f0dc0719000 rw-p 0001f000 07:02 47076 /usr/lib64/libyaml-0.so.2.0.4 7f0dc0719000-7f0dc0730000 r-xp 00000000 07:02 46012 /usr/lib64/liblnetconfig.so.4.0.0 7f0dc0730000-7f0dc092f000 ---p 00017000 07:02 46012 /usr/lib64/liblnetconfig.so.4.0.0 7f0dc092f000-7f0dc0930000 r--p 00016000 07:02 46012 /usr/lib64/liblnetconfig.so.4.0.0 7f0dc0930000-7f0dc0931000 rw-p 00017000 07:02 46012 /usr/lib64/liblnetconfig.so.4.0.0 7f0dc0931000-7f0dc0939000 rw-p 00000000 00:00 0 7f0dc0939000-7f0dc094e000 r-xp 00000000 00:17 64023 /lib64/libz.so.1.2.8 7f0dc094e000-7f0dc0b4d000 ---p 00015000 00:17 64023 /lib64/libz.so.1.2.8 7f0dc0b4d000-7f0dc0b4e000 r--p 00014000 00:17 64023 /lib64/libz.so.1.2.8 7f0dc0b4e000-7f0dc0b4f000 rw-p 00015000 00:17 64023 /lib64/libz.so.1.2.8 7f0dc0b4f000-7f0dc0b72000 r-xp 00000000 07:02 46051 /usr/lib64/liblustreapi.so.1.0.0 7f0dc0b72000-7f0dc0d71000 ---p 00023000 07:02 46051 /usr/lib64/liblustreapi.so.1.0.0 7f0dc0d71000-7f0dc0d72000 r--p 00022000 07:02 46051 /usr/lib64/liblustreapi.so.1.0.0 7f0dc0d72000-7f0dc0d73000 rw-p 00023000 07:02 46051 /usr/lib64/liblustreapi.so.1.0.0 7f0dc0d73000-7f0dc0d7c000 rw-p 00000000 00:00 0 7f0dc0d7c000-7f0dc0d9d000 r-xp 00000000 00:17 63705 /lib64/ld-2.22.so 7f0dc0f3e000-7f0dc0f43000 rw-p 00000000 00:00 0 7f0dc0f9a000-7f0dc0f9c000 rw-p 00000000 00:00 0 7f0dc0f9c000-7f0dc0f9d000 r--p 00020000 00:17 63705 /lib64/ld-2.22.so 7f0dc0f9d000-7f0dc0f9e000 rw-p 00021000 00:17 63705 /lib64/ld-2.22.so 7f0dc0f9e000-7f0dc0f9f000 rw-p 00000000 00:00 0 7ffdfacaf000-7ffdfacd7000 rw-p 00000000 00:00 0 [stack] 7ffdface7000-7ffdfacea000 r--p 00000000 00:00 0 [vvar] 7ffdfacea000-7ffdfacec000 r-xp 00000000 00:00 0 [vdso] ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] Aborted |
| Comment by Andreas Dilger [ 26/Mar/18 ] |
|
jmiller, are you able to run this under gdb or with ltrace to get proper debugging symbols (may need -debug RPM installed)? That should make it more obvious what is going wrong, and hopefully get a fix more quickly. |
| Comment by Patrick Farrell (Inactive) [ 28/Mar/18 ] |
|
Thanks for the suggestion, Andreas. Justin and I dug in to it using gdb + glibc debug, and I see what's wrong in cb_getstripe. It is unpleasantly subtle. snprintf((char *)¶m->fp_lmd->lmd_lmm, param->fp_lum_size, "%s", fname); In this snprintf command, fp_lum_size is 4097 (path_max + 1). lmd_lmm is a struct 32 bytes in size. The snprintf check code in glibc checks this struct size against the string max passed in to snprintf, and, understandably, fails. The actual code works because fp_lmd is defined as: struct lov_user_mds_data_v1 {
lstat_t lmd_st; /* MDS stat struct */
struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
} __attribute__((packed));
(or _v3 - for this case, it doesn't matter which) And the allocation for it uses: for the size (fp_lum_size is the same value as earlier - 4097) So there is a buffer of sufficient size, and we're writing in to the middle of it. So the code will operate correctly, but the runtime check is understandably angry. We're writing to the address of a struct and using it as an arbitrary buffer. We should be explicit about the buffer. I see two ways to make this saner, either of which would - I think? - placate the fortify checks: ((char *)¶m->fp_lmd) + offsetof(struct lov_user_mds_data.lmd_lmm)) 2. Make a union in lov_user_mds_data_v{1,3}, roughly like this: struct lov_user_mds_data_v1 {
lstat_t lmd_st; /* MDS stat struct */
union {
struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
char *path; /* Explicit pointer for file path */
};
} __attribute__((packed));
Then snprintf((param->fp_lmd->path, param->fp_lum_size, "%s", fname);
The second seems better to me. Note I haven't tried either of these. |
| Comment by Patrick Farrell (Inactive) [ 28/Mar/18 ] |
|
Got my pointer magic wrong, this looks to be correct. (This problem can be reproduced on rhel7 if you use D_FORTIFY_SOURCE) diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 59956dc..56e8b4a 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -676,12 +676,18 @@ static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) #define lov_user_mds_data lov_user_mds_data_v1 struct lov_user_mds_data_v1 { lstat_t lmd_st; /* MDS stat struct */ - struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */ + union { + struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */ + char path[0]; /* Explicit pointer for file path */ + }; } __attribute__((packed)); struct lov_user_mds_data_v3 { lstat_t lmd_st; /* MDS stat struct */ - struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */ + union { + struct lov_user_md_v3 lmd_lmm; /* LOV EA V1 user data */ + char path[0]; /* Explicit pointer for file path */ + }; } __attribute__((packed)); #endifdiff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 2c23a1a501..f46674ab8d 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -4507,8 +4507,7 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data, char *fname = strrchr(path, '/'); fname = (fname == NULL ? path : fname + 1);- snprintf((char *)¶m->fp_lmd->lmd_lmm, param->fp_lum_size, - "%s", fname); + snprintf(param->fp_lmd->path, param->fp_lum_size, "%s", fname); ret = ioctl(dirfd(parent), IOC_MDC_GETFILESTRIPE, (void *)¶m->fp_lmd->lmd_lmm);
|
| Comment by Patrick Farrell (Inactive) [ 28/Mar/18 ] |
| Comment by Peter Jones [ 28/Mar/18 ] |
|
There was a typo in the Jira reference in the commit message which was why this was not added automatically |
| Comment by Patrick Farrell (Inactive) [ 28/Mar/18 ] |
|
Ah, oops. |
| Comment by Gerrit Updater [ 28/Mar/18 ] |
|
Andreas Dilger (andreas.dilger@intel.com) uploaded a new patch: https://review.whamcloud.com/31822 |
| Comment by Gerrit Updater [ 30/Mar/18 ] |
|
John L. Hammond (john.hammond@intel.com) uploaded a new patch: https://review.whamcloud.com/31838 |
| Comment by Gerrit Updater [ 30/Mar/18 ] |
|
Oleg Drokin (oleg.drokin@intel.com) merged in patch https://review.whamcloud.com/31822/ |
| Comment by Andreas Dilger [ 30/Mar/18 ] |
|
The spurious stack overflows have been resolved. |