[LU-15651] LFSCK can assign incompatible PFL stripes to a PLAIN layout Created: 14/Mar/22  Updated: 14/Mar/22

Status: Open
Project: Lustre
Component/s: None
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Critical
Reporter: Etienne Aujames Assignee: Etienne Aujames
Resolution: Unresolved Votes: 0
Labels: None
Environment:

VMs + lustre 2.15 ldiskfs


Attachments: HTML File layouts     File lfsck_orphan1.log     HTML File lfsck_orphan1_report     File lfsck_orphan2.log     HTML File lfsck_orphan2_report     File lfsck_remount.log     HTML File lfsck_remount_report     File lfsck_snap.log     HTML File lfsck_snap_report    
Issue Links:
Related
Severity: 3
Rank (Obsolete): 9223372036854775807

 Description   

This corruption appeared in production after applying manually some layout corrections for CEA-5/LU-13535/LU-14837. This resulted to have different stripes versions (PFL and plain) for a file.

Here a reproducer:

  1. create a file with plain layout:
  2. take a LVM snapshot of the MDT
  3. migrate the file to PFL layout
  4. mount the snapshot
  5. run LFSCK to regenerate old plain stripes
  6. mount the real target
  7. run lfsck to regenerate the PFL stripes
  8. run lfsck in orphan mode -> file layout is corrupted (layout is modified to plain), OST stripe are not removed
  9. re-run lfsck in orphan mode -> file layout is corrupted, some PFL stripes are associated to the file plain layout.

Here the details:

  1. create a file with plain layout:
    lmm_stripe_count:  2
    lmm_stripe_size:   1048576
    lmm_pattern:       raid0
    lmm_layout_gen:    0
    lmm_stripe_offset: 0
            obdidx           objid           objid           group
                 0               2            0x2                0
                 1               2            0x2                0
    
  2. take a LVM snapshot of the MDT:
    [root@mds1 ~]# lvcreate -L 500M -s -n mdt1_snap /dev/VGLustre/mdt1
    
  3. migrate the file to PFL layout
    [root@client client]# lfs migrate -E1M -c1 -E-1 -c2 -S4M test
    [root@client client]# lfs getstripe test
    test
      lcm_layout_gen:    4
      lcm_mirror_count:  1
      lcm_entry_count:   2
        lcme_id:             1
        lcme_mirror_id:      0
        lcme_flags:          init
        lcme_extent.e_start: 0
        lcme_extent.e_end:   1048576
          lmm_stripe_count:  1
          lmm_stripe_size:   1048576
          lmm_pattern:       raid0
          lmm_layout_gen:    0
          lmm_stripe_offset: 0
          lmm_objects:
          - 0: { l_ost_idx: 0, l_fid: [0x100000000:0x3:0x0] }
    
        lcme_id:             2
        lcme_mirror_id:      0
        lcme_flags:          init
        lcme_extent.e_start: 1048576
        lcme_extent.e_end:   EOF
          lmm_stripe_count:  2
          lmm_stripe_size:   4194304
          lmm_pattern:       raid0
          lmm_layout_gen:    0
          lmm_stripe_offset: 1
          lmm_objects:
          - 0: { l_ost_idx: 1, l_fid: [0x100010000:0x3:0x0] }
          - 1: { l_ost_idx: 0, l_fid: [0x100000000:0x4:0x0] }
    
  4. mount the snapshot
    [root@mds1 ~]# umount /media/lustrefs/client-mds1
    [root@mds1 ~]#  mount -tlustre /dev/mapper/VGLustre-mdt1_snap /media/lustrefs/client-mds1/
    
    [root@client client]# ll
    ls: cannot access test: No such file or directory
    total 0
    -????????? ? ? ? ?            ? test
    
  5. run LFSCK to regenerate old plain stripes
    [root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -C -c -r
    [root@client client]# lfs getstripe test
    test
    lmm_stripe_count:  2
    lmm_stripe_size:   1048576
    lmm_pattern:       raid0
    lmm_layout_gen:    0
    lmm_stripe_offset: 0
            obdidx           objid           objid           group
                 0               2            0x2                0
                 1               2            0x2                0
    
  6. mount the real target
    [root@mds1 ~]# umount /media/lustrefs/client-mds1
    [root@mds1 ~]#  mount -tlustre /dev/mapper/mds1_flakey /media/lustrefs/client-mds1/
    [root@client client]# ll
    ls: cannot access test: No such file or directory
    total 0
    -????????? ? ? ? ?            ? test
    
  7. run lfsck to regenerate the PFL stripes
    [root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -c -C -r
    
    [root@client client]# ll
    total 0
    -rw-r--r--. 1 root root 0 Mar 14 16:57 test
    [root@client client]# lfs getstripe test
    test
      lcm_layout_gen:    4
      lcm_mirror_count:  1
      lcm_entry_count:   2
        lcme_id:             1
        lcme_mirror_id:      0
        lcme_flags:          init
        lcme_extent.e_start: 0
        lcme_extent.e_end:   1048576
          lmm_stripe_count:  1
          lmm_stripe_size:   1048576
          lmm_pattern:       raid0
          lmm_layout_gen:    0
          lmm_stripe_offset: 0
          lmm_objects:
          - 0: { l_ost_idx: 0, l_fid: [0x100000000:0x3:0x0] }
    
        lcme_id:             2
        lcme_mirror_id:      0
        lcme_flags:          init
        lcme_extent.e_start: 1048576
        lcme_extent.e_end:   EOF
          lmm_stripe_count:  2
          lmm_stripe_size:   4194304
          lmm_pattern:       raid0
          lmm_layout_gen:    0
          lmm_stripe_offset: 1
          lmm_objects:
          - 0: { l_ost_idx: 1, l_fid: [0x100010000:0x3:0x0] }
          - 1: { l_ost_idx: 0, l_fid: [0x100000000:0x4:0x0] }
    
  8. run lfsck in orphan mode -> file layout is corrupted (layout is modified to plain),
    [root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -o -r
    
    [root@client client]# lfs getstripe test
    test
    lmm_stripe_count:  2
    lmm_stripe_size:   1048576
    lmm_pattern:       40000001
    lmm_layout_gen:    1
    lmm_stripe_offset: 0
            obdidx           objid           objid           group
                 0               2            0x2                0
                 0               0              0                0
    
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost1_flakey
      lma: fid=[0x100000000:0x2:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=1048576 stripe_count=2 layout_version=0 range=0
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost2_flakey
      lma: fid=[0x100010000:0x2:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=1 stripe_size=1048576 stripe_count=2 layout_version=0 range=0
    
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost1_flakey
      lma: fid=[0x100000000:0x3:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=1048576 stripe_count=1 component_id=1 component_start=0 component_end=1048576 layout_version=0 range=0
    
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost2_flakey
      lma: fid=[0x100010000:0x3:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=4194304 stripe_count=2 component_id=2 component_start=1048576 component_end=18446744073709551615 layout_version=0 range=0
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((4%32))/4" /dev/mapper/ost1_flakey
      lma: fid=[0x100000000:0x4:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=1 stripe_size=4194304 stripe_count=2 component_id=2 component_start=1048576 component_end=18446744073709551615 layout_version=0 range=0
    
  9. re-run lfsck in orphan mode -> file layout is corrupted, some PFL stripes are associated to the file plain layout.
    [root@mds1 ~]# lctl clear; lctl lfsck_start -A -tall -o -r
    
    [root@client client]# lfs getstripe test
    test
    lmm_stripe_count:  2
    lmm_stripe_size:   1048576
    lmm_pattern:       raid0
    lmm_layout_gen:    5
    lmm_stripe_offset: 0
            obdidx           objid           objid           group
                 0               3            0x3                0
                 0               4            0x4                0
    
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost1_flakey
      lma: fid=[0x100000000:0x3:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=0 stripe_size=1048576 stripe_count=1 component_id=1 component_start=0 component_end=1048576 layout_version=0 range=0
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((4%32))/4" /dev/mapper/ost1_flakey
      lma: fid=[0x100000000:0x4:0x0] compat=8 incompat=0
      fid: parent=[0x200000402:0x1:0x0] stripe=1 stripe_size=4194304 stripe_count=2 component_id=2 component_start=1048576 component_end=18446744073709551615 layout_version=0 range=0
    
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((3%32))/3" /dev/mapper/ost2_flakey
    O/0/d3/3: File not found by ext2_lookup
    
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost1_flakey
    O/0/d2/2: File not found by ext2_lookup
    [root@oss ~]# debugfs -c -R "ea_list O/0/d$((2%32))/2" /dev/mapper/ost2_flakey
    O/0/d2/2: File not found by ext2_lookup
    


 Comments   
Comment by Etienne Aujames [ 14/Mar/22 ]

@pjones, I don't think to be competent enough to propose fix for this: I hardly know the design of LFSCK.

Generated at Sat Feb 10 03:20:09 UTC 2024 using Jira 9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c.