From 9061083098da527671529366315f5bc0e16493c4 Mon Sep 17 00:00:00 2001 From: kkudrynski Date: Wed, 21 Oct 2020 17:19:07 +0200 Subject: [PATCH] [ConvNets/Pyt] Pretrained weights usage guidelines --- PyTorch/Classification/ConvNets/classify.py | 9 ++-- PyTorch/Classification/ConvNets/main.py | 8 +-- .../ConvNets/resnet50v1.5/README.md | 50 +++++++++++++++--- .../ConvNets/resnext101-32x4d/README.md | 52 ++++++++++++++++--- .../ConvNets/se-resnext101-32x4d/README.md | 50 +++++++++++++++--- 5 files changed, 141 insertions(+), 28 deletions(-) diff --git a/PyTorch/Classification/ConvNets/classify.py b/PyTorch/Classification/ConvNets/classify.py index ca2c83c1..597879d6 100644 --- a/PyTorch/Classification/ConvNets/classify.py +++ b/PyTorch/Classification/ConvNets/classify.py @@ -63,17 +63,16 @@ def main(args): if args.weights is not None: weights = torch.load(args.weights) - #Temporary fix to allow NGC checkpoint loading - weights = {k.replace("module.", ""): v for k, v in weights.items()} - + weights = { + k.replace("module.", ""): v for k, v in weights.items() + } model.load_state_dict(weights) model = model.cuda() if args.precision in ["AMP", "FP16"]: - model = model.half() - + model = network_to_half() model.eval() diff --git a/PyTorch/Classification/ConvNets/main.py b/PyTorch/Classification/ConvNets/main.py index a7946649..6391a1d0 100644 --- a/PyTorch/Classification/ConvNets/main.py +++ b/PyTorch/Classification/ConvNets/main.py @@ -363,10 +363,10 @@ def main(args): ) ) pretrained_weights = torch.load(args.pretrained_weights) - - #Temporary fix to allow NGC checkpoint loading - - pretrained_weights = {k.replace("module.", ""): v for k, v in pretrained_weights.items()} + # Temporary fix to allow NGC checkpoint loading + pretrained_weights = { + k.replace("module.", ""): v for k, v in pretrained_weights.items() + } else: print("=> no pretrained weights found at '{}'".format(args.resume)) diff --git a/PyTorch/Classification/ConvNets/resnet50v1.5/README.md b/PyTorch/Classification/ConvNets/resnet50v1.5/README.md index 43a5412c..7a22d5a9 100644 --- a/PyTorch/Classification/ConvNets/resnet50v1.5/README.md +++ b/PyTorch/Classification/ConvNets/resnet50v1.5/README.md @@ -281,17 +281,21 @@ Example: ### 6. Start inference -To run inference on ImageNet on a checkpointed model, run: +You can download pretrained weights from NGC: -`python ./main.py --arch resnet50 --evaluate --epochs 1 --resume -b ` +```bash +wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/zip -O resnet50_pyt_amp_20.06.0.zip -To run inference on JPEG image, you have to first extract the model weights from checkpoint: +unzip resnet50_pyt_amp_20.06.0.zip +``` -`python checkpoint2model.py --checkpoint-path --weight-path ` +To run inference on ImageNet, run: -Then run classification script: +`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights nvidia_resnet50_200821.pth.tar -b ` -`python classify.py --arch resnet50 -c fanin --weights --precision AMP|FP32 --image ` +To run inference on JPEG image using pretrained weights: + +`python classify.py --arch resnet50 -c fanin --weights nvidia_resnet50_200821.pth.tar --precision AMP|FP32 --image ` ## Advanced @@ -445,6 +449,19 @@ Metrics gathered through training: - `train.data_time` - time spent on waiting on data - `train.compute_time` - time spent in forward/backward pass +To restart training from checkpoint use `--resume` option. + +To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option. + +The difference between those two is that the pretrained weights contain only model weights, +and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state. + +Checkpoints are suitable for dividing the training into parts, for example in order +to divide the training job into shorter stages, or restart training after infrastructure fail. + +Pretrained weights can be used as a base for finetuning the model to a different dataset, +or as a backbone to detection models. + ### Inference process Validation is done every epoch, and can be also run separately on a checkpointed model. @@ -470,6 +487,27 @@ Then run classification script: `python classify.py --arch resnet50 -c fanin --weights --precision AMP|FP32 --image ` +You can also run ImageNet validation on pretrained weights: + +`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights -b ` + +#### NGC Pretrained weights: + +Pretrained weights can be downloaded from NGC: + +```bash +wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/zip -O resnet50_pyt_amp_20.06.0.zip + +unzip resnet50_pyt_amp_20.06.0.zip +``` + +To run inference on ImageNet, run: + +`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights nvidia_resnet50_200821.pth.tar -b ` + +To run inference on JPEG image using pretrained weights: + +`python classify.py --arch resnet50 -c fanin --weights nvidia_resnet50_200821.pth.tar --precision AMP|FP32 --image ` ## Performance diff --git a/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md b/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md index dc7e23fb..9c9684e3 100644 --- a/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md +++ b/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md @@ -266,17 +266,21 @@ Example: ### 6. Start inference -To run inference on ImageNet on a checkpointed model, run: +You can download pretrained weights from NGC: -`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --resume -b ` +```bash +wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnext101_32x4d_pyt_amp/versions/20.06.0/zip -O resnext101_32x4d_pyt_amp_20.06.0.zip -To run inference on JPEG image, you have to first extract the model weights from checkpoint: +unzip resnext101_32x4d_pyt_amp_20.06.0.zip +``` -`python checkpoint2model.py --checkpoint-path --weight-path ` +To run inference on ImageNet, run: -Then run classification script: +`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_resnext101-32x4d_200821.pth.tar -b ` -`python classify.py --arch resnext101-32x4d -c fanin --weights --precision AMP|FP32 --image ` +To run inference on JPEG image using pretrained weights: + +`python classify.py --arch resnext101-32x4d -c fanin --weights nvidia_resnext101-32x4d_200821.pth.tar --precision AMP|FP32 --image ` ## Advanced @@ -431,6 +435,19 @@ Metrics gathered through training: - `train.data_time` - time spent on waiting on data - `train.compute_time` - time spent in forward/backward pass +To restart training from checkpoint use `--resume` option. + +To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option. + +The difference between those two is that the pretrained weights contain only model weights, +and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state. + +Checkpoints are suitable for dividing the training into parts, for example in order +to divide the training job into shorter stages, or restart training after infrastructure fail. + +Pretrained weights can be used as a base for finetuning the model to a different dataset, +or as a backbone to detection models. + ### Inference process Validation is done every epoch, and can be also run separately on a checkpointed model. @@ -454,8 +471,29 @@ To run inference on JPEG image, you have to first extract the model weights from Then run classification script: -`python classify.py --arch resnext101-32x4d -c fanin --weights --precision AMP| +`python classify.py --arch resnext101-32x4d -c fanin --weights --precision AMP|FP32 --image ` +You can also run ImageNet validation on pretrained weights: + +`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights -b ` + +#### NGC Pretrained weights: + +Pretrained weights can be downloaded from NGC: + +```bash +wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnext101-32x4d_pyt_amp/versions/20.06.0/zip -O resnext101-32x4d_pyt_amp_20.06.0.zip + +unzip resnext101-32x4d_pyt_amp_20.06.0.zip +``` + +To run inference on ImageNet, run: + +`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_resnext101-32x4d_200821.pth.tar -b ` + +To run inference on JPEG image using pretrained weights: + +`python classify.py --arch resnext101-32x4d -c fanin --weights nvidia_resnext101-32x4d_200821.pth.tar --precision AMP|FP32 --image ` ## Performance diff --git a/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md b/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md index c994e600..ff74a92d 100644 --- a/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md +++ b/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md @@ -267,17 +267,21 @@ Example: ### 6. Start inference -To run inference on ImageNet on a checkpointed model, run: +You can download pretrained weights from NGC: -`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --resume -b ` +```bash +wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/zip -O seresnext101_32x4d_pyt_amp_20.06.0.zip -To run inference on JPEG image, you have to first extract the model weights from checkpoint: +unzip seresnext101_32x4d_pyt_amp_20.06.0.zip +``` -`python checkpoint2model.py --checkpoint-path --weight-path ` +To run inference on ImageNet, run: -Then run classification script: +`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_se-resnext101-32x4d_200821.pth.tar -b ` -`python classify.py --arch se-resnext101-32x4d -c fanin --weights --precision AMP|FP32 --image ` +To run inference on JPEG image using pretrained weights: + +`python classify.py --arch se-resnext101-32x4d -c fanin --weights nvidia_se-resnext101-32x4d_200821.pth.tar --precision AMP|FP32 --image ` ## Advanced @@ -432,6 +436,19 @@ Metrics gathered through training: - `train.data_time` - time spent on waiting on data - `train.compute_time` - time spent in forward/backward pass +To restart training from checkpoint use `--resume` option. + +To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option. + +The difference between those two is that the pretrained weights contain only model weights, +and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state. + +Checkpoints are suitable for dividing the training into parts, for example in order +to divide the training job into shorter stages, or restart training after infrastructure fail. + +Pretrained weights can be used as a base for finetuning the model to a different dataset, +or as a backbone to detection models. + ### Inference process Validation is done every epoch, and can be also run separately on a checkpointed model. @@ -457,6 +474,27 @@ Then run classification script: `python classify.py --arch se-resnext101-32x4d -c fanin --weights --precision AMP|FP32 --image ` +You can also run ImageNet validation on pretrained weights: + +`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights -b ` + +#### NGC Pretrained weights: + +Pretrained weights can be downloaded from NGC: + +```bash +wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/zip -O seresnext101_32x4d_pyt_amp_20.06.0.zip + +unzip seresnext101_32x4d_pyt_amp_20.06.0.zip +``` + +To run inference on ImageNet, run: + +`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_se-resnext101-32x4d_200821.pth.tar -b ` + +To run inference on JPEG image using pretrained weights: + +`python classify.py --arch se-resnext101-32x4d -c fanin --weights nvidia_se-resnext101-32x4d_200821.pth.tar --precision AMP|FP32 --image ` ## Performance