Uploaded image for project: 'Lustre'
  1. Lustre
  2. LU-14464

Auto restripe triggers with none of the reasons

    XMLWordPrintable

Details

    • Bug
    • Resolution: Unresolved
    • Minor
    • None
    • Lustre 2.14.0
    • None
    • Lustre-2.14.0
    • 3
    • 9223372036854775807

    Description

      DNE auto restripe can be triggered by "stat <dir>" today, but that behavior is inconsistent. Here is current setting with 4 x MDTs below.

      [root@es400nvx1-vm1 ~]# clush -a lctl get_param mdt.*.enable_dir_restripe mdt.*.enable_dir_auto_split mdt.*.dir_split_count mdt.*.dir_split_delta mdt.*.dir_restripe_nsonly lod.*.mdt_hash | dshbak
      ----------------
      es400nvx1-vm1
      ----------------
      mdt.ai400x-MDT0000.enable_dir_restripe=0
      mdt.ai400x-MDT0000.enable_dir_auto_split=1
      mdt.ai400x-MDT0000.dir_split_count=50000
      mdt.ai400x-MDT0000.dir_split_delta=4
      mdt.ai400x-MDT0000.dir_restripe_nsonly=0
      lod.ai400x-MDT0000-mdtlov.mdt_hash=fnv_1a_64
      ----------------
      es400nvx1-vm2
      ----------------
      mdt.ai400x-MDT0001.enable_dir_restripe=0
      mdt.ai400x-MDT0001.enable_dir_auto_split=1
      mdt.ai400x-MDT0001.dir_split_count=50000
      mdt.ai400x-MDT0001.dir_split_delta=4
      mdt.ai400x-MDT0001.dir_restripe_nsonly=0
      lod.ai400x-MDT0001-mdtlov.mdt_hash=fnv_1a_64
      ----------------
      es400nvx1-vm3
      ----------------
      mdt.ai400x-MDT0002.enable_dir_restripe=0
      mdt.ai400x-MDT0002.enable_dir_auto_split=1
      mdt.ai400x-MDT0002.dir_split_count=50000
      mdt.ai400x-MDT0002.dir_split_delta=4
      mdt.ai400x-MDT0002.dir_restripe_nsonly=0
      lod.ai400x-MDT0002-mdtlov.mdt_hash=fnv_1a_64
      ----------------
      es400nvx1-vm4
      ----------------
      mdt.ai400x-MDT0003.enable_dir_restripe=0
      mdt.ai400x-MDT0003.enable_dir_auto_split=1
      mdt.ai400x-MDT0003.dir_split_count=50000
      mdt.ai400x-MDT0003.dir_split_delta=4
      mdt.ai400x-MDT0003.dir_restripe_nsonly=0
      lod.ai400x-MDT0003-mdtlov.mdt_hash=fnv_1a_64
      

      Create 1M files and wait 180 sec.

      [root@ec01 ~]# mkdir /ai400x/testdir
      [root@ec01 ~]# salloc -p 40n -N 32 --ntasks-per-node=16  mpirun -mca btl_openib_if_include mlx5_1:1 -x UCX_NET_DEVICES=mlx5_1:1 --bind-to core:overload-allowed --allow-run-as-root /work/tools/bin/mdtest -n 2000 -F -v -d /ai400x/testdir/ -C
      [root@ec01 ~]# sleep 180
      [root@ec01 ~]# lfs df -i
      UUID                      Inodes       IUsed       IFree IUse% Mounted on
      ai400x-MDT0000_UUID     83050496     1024295    82026201   2% /ai400x[MDT:0] 
      ai400x-MDT0001_UUID     83050496         278    83050218   1% /ai400x[MDT:1] 
      ai400x-MDT0002_UUID     83050496         279    83050217   1% /ai400x[MDT:2] 
      ai400x-MDT0003_UUID     83050496         277    83050219   1% /ai400x[MDT:3] 
      ai400x-OST0000_UUID     55574528      135099    55439429   1% /ai400x[OST:0] 
      ai400x-OST0001_UUID     55574528      138358    55436170   1% /ai400x[OST:1] 
      ai400x-OST0002_UUID     55574528      135957    55438571   1% /ai400x[OST:2] 
      ai400x-OST0003_UUID     55574528      135785    55438743   1% /ai400x[OST:3] 
      ai400x-OST0004_UUID     55574528      135259    55439269   1% /ai400x[OST:4] 
      ai400x-OST0005_UUID     55574528      133512    55441016   1% /ai400x[OST:5] 
      ai400x-OST0006_UUID     55574528      137974    55436554   1% /ai400x[OST:6] 
      ai400x-OST0007_UUID     55574528      135153    55439375   1% /ai400x[OST:7] 
      
      filesystem_summary:    332201984     1025129   331176855   1% /ai400x
      

      No auto restripe triggered and this is expected behaviors since it was no "stat" call to directory after file creation.

      Create 2M files and behavior is same as 1M file creation case.

      [root@ec01 ~]# rm -rf /ai400x/testdir
      [root@ec01 ~]# mkdir /ai400x/testdir
      [root@ec01 ~]# salloc -p 40n -N 32 --ntasks-per-node=16  mpirun -mca btl_openib_if_include mlx5_1:1 -x UCX_NET_DEVICES=mlx5_1:1 --bind-to core:overload-allowed --allow-run-as-root /work/tools/bin/mdtest -n 4000 -F -v -d /ai400x/testdir/ -C
      [root@ec01 ~]# sleep 180
      [root@ec01 ~]# lfs df -i
      UUID                      Inodes       IUsed       IFree IUse% Mounted on
      ai400x-MDT0000_UUID     83050496     2048295    81002201   3% /ai400x[MDT:0] 
      ai400x-MDT0001_UUID     83050496         278    83050218   1% /ai400x[MDT:1] 
      ai400x-MDT0002_UUID     83050496         279    83050217   1% /ai400x[MDT:2] 
      ai400x-MDT0003_UUID     83050496         277    83050219   1% /ai400x[MDT:3] 
      ai400x-OST0000_UUID     55574528      267098    55307430   1% /ai400x[OST:0] 
      ai400x-OST0001_UUID     55574528      270359    55304169   1% /ai400x[OST:1] 
      ai400x-OST0002_UUID     55574528      261908    55312620   1% /ai400x[OST:2] 
      ai400x-OST0003_UUID     55574528      261738    55312790   1% /ai400x[OST:3] 
      ai400x-OST0004_UUID     55574528      261210    55313318   1% /ai400x[OST:4] 
      ai400x-OST0005_UUID     55574528      259465    55315063   1% /ai400x[OST:5] 
      ai400x-OST0006_UUID     55574528      263927    55310601   1% /ai400x[OST:6] 
      ai400x-OST0007_UUID     55574528      261104    55313424   1% /ai400x[OST:7] 
      
      filesystem_summary:    332201984     2049129   330152855   1% /ai400x
      

      However, if it creates more than 4M files, somehow re-stripe triggers without "stat" call.

      [root@ec01 ~]# rm -rf /ai400x/testdir
      [root@ec01 ~]# mkdir /ai400x/testdir
      [root@ec01 ~]# salloc -p 40n -N 32 --ntasks-per-node=16  mpirun -mca btl_openib_if_include mlx5_1:1 -x UCX_NET_DEVICES=mlx5_1:1 --bind-to core:overload-allowed --allow-run-as-root /work/tools/bin/mdtest -n 8000 -F -v -d /ai400x/testdir/ -C
      [root@ec01 ~]# sleep 180
      [root@ec01 ~]# lfs df -i
      UUID                      Inodes       IUsed       IFree IUse% Mounted on
      ai400x-MDT0000_UUID     83050496     3878426    79172070   5% /ai400x[MDT:0] 
      ai400x-MDT0001_UUID     83050496       72742    82977754   1% /ai400x[MDT:1] 
      ai400x-MDT0002_UUID     83050496       73048    82977448   1% /ai400x[MDT:2] 
      ai400x-MDT0003_UUID     83050496       72919    82977577   1% /ai400x[MDT:3] 
      ai400x-OST0000_UUID     55574528      527738    55046790   1% /ai400x[OST:0] 
      ai400x-OST0001_UUID     55574528      531255    55043273   1% /ai400x[OST:1] 
      ai400x-OST0002_UUID     55574528      526484    55048044   1% /ai400x[OST:2] 
      ai400x-OST0003_UUID     55574528      526186    55048342   1% /ai400x[OST:3] 
      ai400x-OST0004_UUID     55574528      525658    55048870   1% /ai400x[OST:4] 
      ai400x-OST0005_UUID     55574528      517896    55056632   1% /ai400x[OST:5] 
      ai400x-OST0006_UUID     55574528      518295    55056233   1% /ai400x[OST:6] 
      ai400x-OST0007_UUID     55574528      525680    55048848   1% /ai400x[OST:7] 
      
      filesystem_summary:    332201984     4097135   328104849   2% /ai400x
      

      Attachments

        Issue Links

          Activity

            People

              wc-triage WC Triage
              sihara Shuichi Ihara
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated: