Details
-
Bug
-
Resolution: Unresolved
-
Critical
-
None
-
None
-
None
-
VMs + lustre 2.15 ldiskfs
-
3
-
9223372036854775807
Description
This corruption appeared in production after applying manually some layout corrections for CEA-5/LU-13535/LU-14837. This resulted to have different stripes versions (PFL and plain) for a file.
Here a reproducer:
- create a file with plain layout:
- take a LVM snapshot of the MDT
- migrate the file to PFL layout
- mount the snapshot
- run LFSCK to regenerate old plain stripes
- mount the real target
- run lfsck to regenerate the PFL stripes
- run lfsck in orphan mode -> file layout is corrupted (layout is modified to plain), OST stripe are not removed
- re-run lfsck in orphan mode -> file layout is corrupted, some PFL stripes are associated to the file plain layout.
Here the details:
- create a file with plain layout:
lmm_stripe_count: 2 lmm_stripe_size: 1048576 lmm_pattern: raid0 lmm_layout_gen: 0 lmm_stripe_offset: 0 obdidx objid objid group 0 2 0x2 0 1 2 0x2 0 - take a LVM snapshot of the MDT:
[root@mds1 ~]# lvcreate -L 500M -s -n mdt1_snap /dev/VGLustre/mdt1
- migrate the file to PFL layout
[root@client client]# lfs migrate -E1M -c1 -E-1 -c2 -S4M test [root@client client]# lfs getstripe test test lcm_layout_gen: 4 lcm_mirror_count: 1 lcm_entry_count: 2 lcme_id: 1 lcme_mirror_id: 0 lcme_flags: init lcme_extent.e_start: 0 lcme_extent.e_end: 1048576 lmm_stripe_count: 1 lmm_stripe_size: 1048576 lmm_pattern: raid0 lmm_layout_gen: 0 lmm_stripe_offset: 0 lmm_objects: - 0: { l_ost_idx: 0, l_fid: [0x100000000:0x3:0x0] } lcme_id: 2 lcme_mirror_id: 0 lcme_flags: init lcme_extent.e_start: 1048576 lcme_extent.e_end: EOF lmm_stripe_count: 2 lmm_stripe_size: 4194304 lmm_pattern: raid0 lmm_layout_gen: 0 lmm_stripe_offset: 1 lmm_objects: - 0: { l_ost_idx: 1, l_fid: [0x100010000:0x3:0x0] } - 1: { l_ost_idx: 0, l_fid: [0x100000000:0x4:0x0] } - mount the snapshot
[root@mds1 ~]# umount /media/lustrefs/client-mds1 [root@mds1 ~]# mount -tlustre /dev/mapper/VGLustre-mdt1_snap /media/lustrefs/client-mds1/ [root@client client]# ll ls: cannot access test: No such file or directory total 0 -????????? ? ? ? ? ? test
- run LFSCK to regenerate old plain stripes
[root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -C -c -r [root@client client]# lfs getstripe test test lmm_stripe_count: 2 lmm_stripe_size: 1048576 lmm_pattern: raid0 lmm_layout_gen: 0 lmm_stripe_offset: 0 obdidx objid objid group 0 2 0x2 0 1 2 0x2 0 - mount the real target
[root@mds1 ~]# umount /media/lustrefs/client-mds1 [root@mds1 ~]# mount -tlustre /dev/mapper/mds1_flakey /media/lustrefs/client-mds1/ [root@client client]# ll ls: cannot access test: No such file or directory total 0 -????????? ? ? ? ? ? test
- run lfsck to regenerate the PFL stripes
[root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -c -C -r [root@client client]# ll total 0 -rw-r--r--. 1 root root 0 Mar 14 16:57 test [root@client client]# lfs getstripe test test lcm_layout_gen: 4 lcm_mirror_count: 1 lcm_entry_count: 2 lcme_id: 1 lcme_mirror_id: 0 lcme_flags: init lcme_extent.e_start: 0 lcme_extent.e_end: 1048576 lmm_stripe_count: 1 lmm_stripe_size: 1048576 lmm_pattern: raid0 lmm_layout_gen: 0 lmm_stripe_offset: 0 lmm_objects: - 0: { l_ost_idx: 0, l_fid: [0x100000000:0x3:0x0] } lcme_id: 2 lcme_mirror_id: 0 lcme_flags: init lcme_extent.e_start: 1048576 lcme_extent.e_end: EOF lmm_stripe_count: 2 lmm_stripe_size: 4194304 lmm_pattern: raid0 lmm_layout_gen: 0 lmm_stripe_offset: 1 lmm_objects: - 0: { l_ost_idx: 1, l_fid: [0x100010000:0x3:0x0] } - 1: { l_ost_idx: 0, l_fid: [0x100000000:0x4:0x0] } - run lfsck in orphan mode -> file layout is corrupted (layout is modified to plain),
[root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -o -r [root@client client]# lfs getstripe test test lmm_stripe_count: 2 lmm_stripe_size: 1048576 lmm_pattern: 40000001 lmm_layout_gen: 1 lmm_stripe_offset: 0 obdidx objid objid group 0 2 0x2 0 0 0 0 0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost1_flakey lma: fid=[0x100000000:0x2:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=1048576 stripe_count=2 layout_version=0 range=0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost2_flakey lma: fid=[0x100010000:0x2:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=1 stripe_size=1048576 stripe_count=2 layout_version=0 range=0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost1_flakey lma: fid=[0x100000000:0x3:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=1048576 stripe_count=1 component_id=1 component_start=0 component_end=1048576 layout_version=0 range=0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost2_flakey lma: fid=[0x100010000:0x3:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=4194304 stripe_count=2 component_id=2 component_start=1048576 component_end=18446744073709551615 layout_version=0 range=0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((4%32))/4" /dev/mapper/ost1_flakey lma: fid=[0x100000000:0x4:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=1 stripe_size=4194304 stripe_count=2 component_id=2 component_start=1048576 component_end=18446744073709551615 layout_version=0 range=0 - re-run lfsck in orphan mode -> file layout is corrupted, some PFL stripes are associated to the file plain layout.
[root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -o -r [root@client client]# lfs getstripe test test lmm_stripe_count: 2 lmm_stripe_size: 1048576 lmm_pattern: raid0 lmm_layout_gen: 5 lmm_stripe_offset: 0 obdidx objid objid group 0 3 0x3 0 0 4 0x4 0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost1_flakey lma: fid=[0x100000000:0x3:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=1048576 stripe_count=1 component_id=1 component_start=0 component_end=1048576 layout_version=0 range=0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((4%32))/4" /dev/mapper/ost1_flakey lma: fid=[0x100000000:0x4:0x0] compat=8 incompat=0 fid: parent=[0x200000402:0x1:0x0] stripe=1 stripe_size=4194304 stripe_count=2 component_id=2 component_start=1048576 component_end=18446744073709551615 layout_version=0 range=0 [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost2_flakey O/0/d3/3: File not found by ext2_lookup [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost1_flakey O/0/d2/2: File not found by ext2_lookup [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost2_flakey O/0/d2/2: File not found by ext2_lookup