diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 9dde5f7295..1864528393 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -570,6 +570,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PLAIN_RECORDS 0x1319 #define OBD_FAIL_CATALOG_FULL_CHECK 0x131a #define OBD_FAIL_CATLIST 0x131b +#define OBD_FAIL_LLOG_PAUSE_AFTER_PAD 0x131c #define OBD_FAIL_LLITE 0x1400 #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401 diff --git a/lustre/obdclass/llog_osd.c b/lustre/obdclass/llog_osd.c index 6021396f50..bfd3389e04 100644 --- a/lustre/obdclass/llog_osd.c +++ b/lustre/obdclass/llog_osd.c @@ -389,6 +389,7 @@ static int llog_osd_write_rec(const struct lu_env *env, __u32 chunk_size; size_t left; __u32 orig_last_idx; + bool pad = false; ENTRY; llh = loghandle->lgh_hdr; @@ -580,6 +581,7 @@ static int llog_osd_write_rec(const struct lu_env *env, RETURN(rc); loghandle->lgh_last_idx++; /* for pad rec */ + pad = true; } /* if it's the last idx in log file, then return -ENOSPC * or wrap around if a catalog */ @@ -671,6 +673,8 @@ static int llog_osd_write_rec(const struct lu_env *env, if (rc != 0) GOTO(out_unlock, rc); } + if (pad) + OBD_FAIL_TIMEOUT(OBD_FAIL_LLOG_PAUSE_AFTER_PAD, 1); out_unlock: /* unlock here for remote object */ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index a13262b1f9..9f2961f2cc 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -26138,6 +26138,34 @@ test_902() { } run_test 902 "test short write doesn't hang lustre" +function t1000_load() { + mkdir $DIR/$tdir + #define OBD_FAIL_LLOG_PAUSE_AFTER_PAD 0x131c + $LCTL set_param fail_loc=0x131c + for ((i=0; i<5000; i++)); do + touch $DIR/$tdir/f$i + done +} + +test_1000() { + changelog_register || error "changelog_register failed" + local cl_user="${CL_USERS[$SINGLEMDS]%% *}" + changelog_users $SINGLEMDS | grep -q $cl_user || + error "User $cl_user not found in changelog_users" + changelog_chmask "ALL" + t1000_load & + local PID=$! + for((i=0; i<100; i++)); do + changelog_dump >/dev/null || + error "can't read changelog" + done + kill $PID + wait $PID + changelog_deregister || error "changelog_deregister failed" + $LCTL set_param fail_loc=0 +} +run_test 1000 "haha" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre