Update checkpoint MaskRCNN

Signed-off-by: Pablo Ribalta <pribalta@nvidia.com>
This commit is contained in:
Pablo Ribalta 2020-07-23 15:00:13 +02:00
parent 9ba22d1f29
commit f4625a2935
10 changed files with 19 additions and 25 deletions

View file

@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#mkdir -p weights/
mkdir -p /model
cd /model
# DOWNLOAD CHECKPOINTS
@ -29,7 +29,7 @@ wget -N ${BASE_URL}/variables/variables.data-00000-of-00001 -P ${DEST_DIR}/varia
wget -N ${BASE_URL}/variables/variables.index -P ${DEST_DIR}/variables
## ====================== resnet-nhwc-2018-02-07 ====================== ##
BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-02-07"
BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07"
DEST_DIR="resnet/resnet-nhwc-2018-02-07"
wget -N ${BASE_URL}/checkpoint -P ${DEST_DIR}
@ -38,12 +38,12 @@ wget -N ${BASE_URL}/model.ckpt-112603.index -P ${DEST_DIR}
wget -N ${BASE_URL}/model.ckpt-112603.meta -P ${DEST_DIR}
## ====================== resnet-nhwc-2018-10-14 ====================== ##
BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
DEST_DIR="resnet/resnet-nhwc-2018-10-14"
wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
#BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
#DEST_DIR="resnet/resnet-nhwc-2018-10-14"
#
#wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
#wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
#wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
# VERIFY CHECKPOINTS
echo "Verifying and Processing Checkpoints..."
@ -64,8 +64,8 @@ python inspect_checkpoint.py --file_name=mask-rcnn/1555659850/ckpt/model.ckpt \
python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-02-07/model.ckpt-112603 \
> resnet/resnet-nhwc-2018-02-07/tensors_and_shape.txt
python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
> resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
#python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
# > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
python inspect_checkpoint.py --file_name=resnet/extracted_from_maskrcnn/resnet50.ckpt \
> resnet/extracted_from_maskrcnn/tensors_and_shape.txt

View file

@ -36,7 +36,7 @@ def main():
f'python {main_path}'
f' --mode eval'
f' --model_dir "{flags.model_dir}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-10-14/model.ckpt-112602")}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
f' --validation_file_pattern "{os.path.join(flags.data_dir, "val*.tfrecord")}"'
f' --val_json_file "{os.path.join(flags.data_dir, "annotations/instances_val2017.json")}"'
f' --num_steps_per_eval 200'

View file

@ -38,7 +38,7 @@ def main():
f'python {main_path}'
f' --mode train'
f' --model_dir "{flags.model_dir}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-10-14/model.ckpt-112602")}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
f' --training_file_pattern "{os.path.join(flags.data_dir, "train*.tfrecord")}"'
f' --init_learning_rate 0.04'
f' --total_steps 200'

View file

@ -22,7 +22,7 @@ export CUDA_VISIBLE_DEVICES=0
python ${BASEDIR}/../mask_rcnn_main.py \
--mode="train_and_eval" \
--checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
--checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
--eval_samples=5000 \
--init_learning_rate=0.005 \
--learning_rate_steps="240000,320000" \

View file

@ -32,7 +32,7 @@ mpirun \
--allow-run-as-root \
python ${BASEDIR}/../mask_rcnn_main.py \
--mode="train_and_eval" \
--checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
--checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
--eval_samples=5000 \
--init_learning_rate=0.02 \
--learning_rate_steps="60000,80000" \

View file

@ -30,7 +30,7 @@ mpirun \
--allow-run-as-root \
python ${BASEDIR}/../mask_rcnn_main.py \
--mode="train_and_eval" \
--checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
--checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
--eval_samples=5000 \
--init_learning_rate=0.04 \
--learning_rate_steps="30000,40000" \

View file

@ -22,7 +22,7 @@ export CUDA_VISIBLE_DEVICES=0
python ${BASEDIR}/../mask_rcnn_main.py \
--mode="train_and_eval" \
--checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
--checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
--eval_samples=5000 \
--init_learning_rate=0.005 \
--learning_rate_steps="240000,320000" \

View file

@ -32,7 +32,7 @@ mpirun \
--allow-run-as-root \
python ${BASEDIR}/../mask_rcnn_main.py \
--mode="train_and_eval" \
--checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
--checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
--eval_samples=5000 \
--init_learning_rate=0.02 \
--learning_rate_steps="60000,80000" \

View file

@ -30,7 +30,7 @@ mpirun \
--allow-run-as-root \
python ${BASEDIR}/../mask_rcnn_main.py \
--mode="train_and_eval" \
--checkpoint="/model/resnet/resnet-nhwc-2018-10-14/model.ckpt-112602" \
--checkpoint="/model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603" \
--eval_samples=5000 \
--init_learning_rate=0.04 \
--learning_rate_steps="30000,40000" \

View file

@ -20,7 +20,6 @@ import sys
import getopt
import logging
import tensorflow as tf
from distutils.version import LooseVersion
"""
python weights/extract_RN50_weights.py \
@ -45,12 +44,7 @@ def rename(checkpoint_dir, save_to, dry_run, verbose):
total_vars_loaded = 0
if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
file_list = tf.contrib.framework.list_variables(checkpoint_dir)
else:
file_list = tf.train.list_variables(checkpoint_dir)
for var_name, _ in file_list:
for var_name, _ in tf.train.list_variables(checkpoint_dir):
if "resnet50" in var_name:
# Load the variable