fix script

This commit is contained in:
Meeeeee6623 2021-09-20 01:19:39 -04:00
parent 4cc3db0d8b
commit b0de218fcc
2 changed files with 14 additions and 6 deletions

View file

@ -5,7 +5,7 @@ for (( n = 0; i < 3; n++ )); do
mkdir /imagenet/TensorFlow/run_"${n}"/epoch_"${i}"/
echo "Created Folder epoch_${i}"
mpiexec --allow-run-as-root --bind-to socket -np 2 python3 main.py --arch=resnet50 --mode=train_and_evaluate \
--num_iter=i --batch_size=192 --warmup_steps=0 --lr_warmup_epochs=0 --model_dir=/imagenet/TensorFlow/model \
--num_iter="${i}" --batch_size=192 --warmup_steps=0 --lr_warmup_epochs=0 --model_dir=/imagenet/TensorFlow/model \
--data_dir=/imagenet/tfrecords --data_idx_dir=/imagenet/dali_idx \
--results_dir=/imagenet/TensorFlow/run_"${n}"/epoch_"${i}"/ \
--export_dir=/imagenet/TensorFlow/model --weight_init=fan_in --amp --log_filename run_"${n}"epoch_"${i}".json

View file

@ -1,14 +1,22 @@
# nvidia-docker run --rm -it --ipc=host -v /work/chauhans/cifar100:/cifar100 rn50_tf1
for (( n = 0; i < 1; n++ )); do
# set classes to 100 in main.py before running
for (( n = 1; i < 6; n++ )); do
for (( i = 0; i < 91; i++ )); do
mkdir /imagenet/TensorFlow/run_"${n}"/epoch_"${i}"/
mkdir /cifar100/TensorFlow/run_"${n}"/epoch_"${i}"/
echo "Created Folder epoch_${i}"
mpiexec --allow-run-as-root --bind-to socket -np 2 python3 main.py --arch=resnet50 --mode=train_and_evaluate \
--num_iter=i --batch_size=192 --warmup_steps=0 --lr_warmup_epochs=0 --model_dir=/imagenet/TensorFlow/model \
--data_dir=/imagenet/tfrecords --results_dir=/imagenet/TensorFlow/run_"${n}"/epoch_"${i}"/ \
--export_dir=/imagenet/TensorFlow/model --weight_init=fan_in --amp --log_filename run_"${n}"epoch_"${i}".json
--num_iter="${i}" --batch_size=192 --warmup_steps=0 --lr_warmup_epochs=0 --model_dir=/cifar100/TensorFlow/run_"${n}"/model \
--data_dir=/cifar100/tfrecords --results_dir=/cifar100/TensorFlow/run_"${n}"/epoch_"${i}"/ \
--export_dir=/cifar100/TensorFlow/run_"${n}"/model --weight_init=fan_in --amp --log_filename run_"${n}"epoch_"${i}".json
echo "Epoch ${i} done"
done
echo "Run ${n} done"
done
# mpiexec --allow-run-as-root --bind-to socket -np 2 python3 main.py --arch=resnet50 --mode=train_and_evaluate \
# --num_iter=90 --batch_size=192 --warmup_steps=0 --lr_warmup_epochs=0 --model_dir=/cifar100/TensorFlow/model \
# --data_dir=/cifar100/tfrecords --results_dir=/cifar100/TensorFlow/test \
# --export_dir=/cifar100/TensorFlow/model --weight_init=fan_in --amp --log_filename test.json