Details
-
Bug
-
Resolution: Unresolved
-
Minor
-
None
-
None
-
3
-
9223372036854775807
Description
test_30d()
{ cp $(which dd) $DIR || error "failed to copy dd to $DIR/dd" for i in \{1..10}; do
$DIR/dd bs=1M count=128 if=/dev/zero of=$DIR/$tfile &
local PID=$!
sleep 1
$LCTL set_param ldlm.namespaces.MDT.lru_size=clear
wait $PID || error "executing dd from Lustre failed"
rm -f $DIR/$tfile
done
rm -f $DIR/dd
}
So that's 128 blocks of 1M size for each iteration. The test logs shows out of space error in the 5th iteration:
ldlm.namespaces.lustre-MDT0000-lwp-MDT0000.lru_size=clear
ldlm.namespaces.lustre-MDT0000-mdc-ffff88847bdc0800.lru_size=clear
ldlm.namespaces.lustre-MDT0001-mdc-ffff88847bdc0800.lru_size=clear
ldlm.namespaces.lustre-MDT0001-osp-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0000-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0001-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0002-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0003-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0004-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0005-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0006-osc-MDT0000.lru_size=clear
ldlm.namespaces.lustre-OST0007-osc-MDT0000.lru_size=clear
ldlm.namespaces.mdt-lustre-MDT0000_UUID.lru_size=clear
/mnt/lustre/dd: error writing '/mnt/lustre/f30d.sanity': No space left on device
67+0 records in
66+0 records out
69206016 bytes (69 MB, 66 MiB) copied, 1.38986 s, 49.8 MB/s
sanity test_30d: @@@@@@ FAIL: executing dd from Lustre failed
Trace dump:
= /usr/lib/lustre/tests/test-framework.sh:6273:error()
= /usr/lib/lustre/tests/sanity.sh:3144:test_30d()
= /usr/lib/lustre/tests/test-framework.sh:6576:run_one()
= /usr/lib/lustre/tests/test-framework.sh:6623:run_one_logged()
= /usr/lib/lustre/tests/test-framework.sh:6450:run_test()
= /usr/lib/lustre/tests/sanity.sh:3150:main()
This is the size of the file system AFTER the test suite:
df /mnt/lustre
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sdb1 32711388 6270568 25058032 21% /
Depending on the environment in question, timing for this test may vary, which may cause us to hit OOS due to only provisioning a 400M device. Bumping this simply to 1G provides sufficient room that this rare bug no longer recurs.
Patch to be sent shortly.