Uploaded image for project: 'Lustre'
  1. Lustre
  2. LU-15412

sanity test 807 stuck on multi-nodes setup when CLIENTS is empty

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Minor
    • Lustre 2.16.0, Lustre 2.15.3
    • None
    • None
    • Muli-node Lustre test environment.
    • 3
    • 9223372036854775807

    Description

      Sanity test 807 stuck on multi-nodes test environment when CLIENTS is empty.

      [root@lustre-aio tests]# PTLDEBUG=-1  ./auster  -vr sanity --only 807
      2...
      3
      4== sanity test 807: verify LSOM syncing tool ============= 02:19:11 (1641349151)
      5mdd.lustre-MDT0000.changelog_mask=+hsm
      6mdd.lustre-MDT0001.changelog_mask=+hsm
      7Registered 2 changelog users: 'cl1 cl1'
      8llite.lustre-fffff7ba8ec63000.xattr_cache=0
      91+0 records in
      101+0 records out
      111048576 bytes (1.0 MB, 1.0 MiB) copied, 0.101602 s, 10.3 MB/s
      12Test SOM for multi-client (0) writes
      13No such rcmd module "(PATH=$PATH"
      14pdsh@lustre-aio: Failed to register rcmd "(PATH=$PATH" for "/lib64/lustre/utils:/lib64/lustre/tests:/sbin:/usr/sbin; cd /lib64/lustre/tests; LUSTRE="/lib64/lustre"  mds1_FSTYPE=ldiskfs ost1_FSTYPE=ldiskfs VERBOSE=false FSTYPE=ldiskfs NETTYPE=tcp sh -c "sync ; sleep 5 ; sync")"
      15Start to sync 3 records.
      16lustre-MDT0000: clear the changelog for cl1 of all records
      17lustre-MDT0000: Deregistered changelog user #1
      18
      19...stucking... 

      But defining var “CLIENTS=$(hostname)” in cfg/local.sh, the test case passes.

       

      sanity test 807 source code

      27113 test_807() {
      27114     [ -n "$FILESET" ] && skip "Not functional for FILESET set"
      27115     [ $MDS1_VERSION -lt $(version_code 2.11.52) ] &&
      27116         skip "Need MDS version at least 2.11.52"
      27117
      27118     # Registration step
      27119     changelog_register || error "changelog_register failed"
      27120     local cl_user="${CL_USERS[$SINGLEMDS]%% *}"
      27121     changelog_users $SINGLEMDS | grep -q $cl_user ||
      27122         error "User $cl_user not found in changelog_users"
      27123
      27124     local save="$TMP/$TESTSUITE-$TESTNAME.parameters"
      27125     save_lustre_params client "llite.*.xattr_cache" > $save
      27126     lctl set_param llite.*.xattr_cache=0
      27127     stack_trap "restore_lustre_params < $save; rm -f $save" EXIT
      27128
      27129     rm -rf $DIR/$tdir || error "rm $tdir failed"
      27130     mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed"
      27131     touch $DIR/$tdir/trunc || error "touch $tdir/trunc failed"
      27132     $TRUNCATE $DIR/$tdir/trunc 1024 || error "truncate $tdir/trunc failed"
      27133     $TRUNCATE $DIR/$tdir/trunc 1048576 ||
      27134         error "truncate $tdir/trunc failed"
      27135
      27136     local bs=1048576
      27137     dd if=/dev/zero of=$DIR/$tdir/single_dd bs=$bs count=1 conv=fsync ||
      27138         error "write $tfile failed"
      27139
      27140     # multi-client wirtes
      27141     local num=$(get_node_count ${CLIENTS//,/ })
      27142     local offset=0
      27143     local i=0
      27144
      27145     echo "Test SOM for multi-client ($num) writes"
      27146     touch $DIR/$tfile || error "touch $tfile failed"
      27147     $TRUNCATE $DIR/$tfile 0
      27148     for client in ${CLIENTS//,/ }; do
      27149         do_node $client $MULTIOP $DIR/$tfile Oz${offset}w${bs}c &
      27150         local pids[$i]=$!
      27151         i=$((i + 1))
      27152         offset=$((offset + $bs))
      27153     done
      27154     for (( i=0; i < $num; i++ )); do
      27155         wait ${pids[$i]}
      27156     done
      27157
      27158     do_rpc_nodes "$CLIENTS" cancel_lru_locks osc
      27159     do_nodes "$CLIENTS" "sync ; sleep 5 ; sync"
      27160     $LSOM_SYNC -u $cl_user -m $FSNAME-MDT0000 $MOUNT
      27161     check_lsom_data $DIR/$tdir/trunc
      27162     check_lsom_data $DIR/$tdir/single_dd
      27163     check_lsom_data $DIR/$tfile
      27164
      27165     rm -rf $DIR/$tdir
      27166     # Deregistration step
      27167     changelog_deregister || error "changelog_deregister failed"
      27168 }
      27169 run_test 807 "verify LSOM syncing tool"
       

      Attachments

        Activity

          People

            xinliang Xinliang Liu
            xinliang Xinliang Liu
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: