From 9061083098da527671529366315f5bc0e16493c4 Mon Sep 17 00:00:00 2001
From: kkudrynski <kkudrynski@nvidia.com>
Date: Wed, 21 Oct 2020 17:19:07 +0200
Subject: [PATCH] [ConvNets/Pyt] Pretrained weights usage guidelines

---
 PyTorch/Classification/ConvNets/classify.py   |  9 ++--
 PyTorch/Classification/ConvNets/main.py       |  8 +--
 .../ConvNets/resnet50v1.5/README.md           | 50 +++++++++++++++---
 .../ConvNets/resnext101-32x4d/README.md       | 52 ++++++++++++++++---
 .../ConvNets/se-resnext101-32x4d/README.md    | 50 +++++++++++++++---
 5 files changed, 141 insertions(+), 28 deletions(-)
diff --git a/PyTorch/Classification/ConvNets/classify.py b/PyTorch/Classification/ConvNets/classify.py
index ca2c83c1..597879d6 100644
--- a/PyTorch/Classification/ConvNets/classify.py
+++ b/PyTorch/Classification/ConvNets/classify.py
@@ -63,17 +63,16 @@ def main(args):
 
     if args.weights is not None:
         weights = torch.load(args.weights)
-
         #Temporary fix to allow NGC checkpoint loading
-        weights = {k.replace("module.", ""): v for k, v in weights.items()}
-
+        weights = {
+            k.replace("module.", ""): v for k, v in weights.items()
+        }
         model.load_state_dict(weights)
 
     model = model.cuda()
 
     if args.precision in ["AMP", "FP16"]:
-        model = model.half()
-
+        model = network_to_half()
 
     model.eval()
 
diff --git a/PyTorch/Classification/ConvNets/main.py b/PyTorch/Classification/ConvNets/main.py
index a7946649..6391a1d0 100644
--- a/PyTorch/Classification/ConvNets/main.py
+++ b/PyTorch/Classification/ConvNets/main.py
@@ -363,10 +363,10 @@ def main(args):
                 )
             )
             pretrained_weights = torch.load(args.pretrained_weights)
-
-            #Temporary fix to allow NGC checkpoint loading
-
-            pretrained_weights = {k.replace("module.", ""): v for k, v in pretrained_weights.items()}
+            # Temporary fix to allow NGC checkpoint loading
+            pretrained_weights = {
+                k.replace("module.", ""): v for k, v in pretrained_weights.items()
+            }
         else:
             print("=> no pretrained weights found at '{}'".format(args.resume))
 
diff --git a/PyTorch/Classification/ConvNets/resnet50v1.5/README.md b/PyTorch/Classification/ConvNets/resnet50v1.5/README.md
index 43a5412c..7a22d5a9 100644
--- a/PyTorch/Classification/ConvNets/resnet50v1.5/README.md
+++ b/PyTorch/Classification/ConvNets/resnet50v1.5/README.md
@@ -281,17 +281,21 @@ Example:
 
 ### 6. Start inference
 
-To run inference on ImageNet on a checkpointed model, run:
+You can download pretrained weights from NGC:
 
-`python ./main.py --arch resnet50 --evaluate --epochs 1 --resume <path to checkpoint> -b <batch size> <path to imagenet>`
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/zip -O resnet50_pyt_amp_20.06.0.zip
 
-To run inference on JPEG image, you have to first extract the model weights from checkpoint:
+unzip resnet50_pyt_amp_20.06.0.zip
+```
 
-`python checkpoint2model.py --checkpoint-path <path to checkpoint> --weight-path <path where weights will be stored>`
+To run inference on ImageNet, run:
 
-Then run classification script:
+`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights nvidia_resnet50_200821.pth.tar -b <batch size> <path to imagenet>`
 
-`python classify.py --arch resnet50 -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnet50 -c fanin --weights nvidia_resnet50_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Advanced
@@ -445,6 +449,19 @@ Metrics gathered through training:
  - `train.data_time` - time spent on waiting on data
  - `train.compute_time` - time spent in forward/backward pass
 
+To restart training from checkpoint use `--resume` option.
+
+To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option.
+
+The difference between those two is that the pretrained weights contain only model weights,
+and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state.
+
+Checkpoints are suitable for dividing the training into parts, for example in order
+to divide the training job into shorter stages, or restart training after infrastructure fail.
+
+Pretrained weights can be used as a base for finetuning the model to a different dataset,
+or as a backbone to detection models.
+
 ### Inference process
 
 Validation is done every epoch, and can be also run separately on a checkpointed model.
@@ -470,6 +487,27 @@ Then run classification script:
 
 `python classify.py --arch resnet50 -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
 
+You can also run ImageNet validation on pretrained weights:
+
+`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights <path to pretrained weights> -b <batch size> <path to imagenet>`
+
+#### NGC Pretrained weights:
+
+Pretrained weights can be downloaded from NGC:
+
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/zip -O resnet50_pyt_amp_20.06.0.zip
+
+unzip resnet50_pyt_amp_20.06.0.zip
+```
+
+To run inference on ImageNet, run:
+
+`python ./main.py --arch resnet50 --evaluate --epochs 1 --pretrained-weights nvidia_resnet50_200821.pth.tar -b <batch size> <path to imagenet>`
+
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnet50 -c fanin --weights nvidia_resnet50_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Performance
diff --git a/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md b/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md
index dc7e23fb..9c9684e3 100644
--- a/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md
+++ b/PyTorch/Classification/ConvNets/resnext101-32x4d/README.md
@@ -266,17 +266,21 @@ Example:
 
 ### 6. Start inference
 
-To run inference on ImageNet on a checkpointed model, run:
+You can download pretrained weights from NGC:
 
-`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --resume <path to checkpoint> -b <batch size> <path to imagenet>`
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnext101_32x4d_pyt_amp/versions/20.06.0/zip -O resnext101_32x4d_pyt_amp_20.06.0.zip
 
-To run inference on JPEG image, you have to first extract the model weights from checkpoint:
+unzip resnext101_32x4d_pyt_amp_20.06.0.zip
+```
 
-`python checkpoint2model.py --checkpoint-path <path to checkpoint> --weight-path <path where weights will be stored>`
+To run inference on ImageNet, run:
 
-Then run classification script:
+`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
 
-`python classify.py --arch resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnext101-32x4d -c fanin --weights nvidia_resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Advanced
@@ -431,6 +435,19 @@ Metrics gathered through training:
  - `train.data_time` - time spent on waiting on data
  - `train.compute_time` - time spent in forward/backward pass
 
+To restart training from checkpoint use `--resume` option.
+
+To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option.
+
+The difference between those two is that the pretrained weights contain only model weights,
+and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state.
+
+Checkpoints are suitable for dividing the training into parts, for example in order
+to divide the training job into shorter stages, or restart training after infrastructure fail.
+
+Pretrained weights can be used as a base for finetuning the model to a different dataset,
+or as a backbone to detection models.
+
 ### Inference process
 
 Validation is done every epoch, and can be also run separately on a checkpointed model.
@@ -454,8 +471,29 @@ To run inference on JPEG image, you have to first extract the model weights from
 
 Then run classification script:
 
-`python classify.py --arch resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|
+`python classify.py --arch resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
 
+You can also run ImageNet validation on pretrained weights:
+
+`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights <path to pretrained weights> -b <batch size> <path to imagenet>`
+
+#### NGC Pretrained weights:
+
+Pretrained weights can be downloaded from NGC:
+
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/resnext101-32x4d_pyt_amp/versions/20.06.0/zip -O resnext101-32x4d_pyt_amp_20.06.0.zip
+
+unzip resnext101-32x4d_pyt_amp_20.06.0.zip
+```
+
+To run inference on ImageNet, run:
+
+`python ./main.py --arch resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
+
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch resnext101-32x4d -c fanin --weights nvidia_resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Performance
diff --git a/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md b/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md
index c994e600..ff74a92d 100644
--- a/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md
+++ b/PyTorch/Classification/ConvNets/se-resnext101-32x4d/README.md
@@ -267,17 +267,21 @@ Example:
 
 ### 6. Start inference
 
-To run inference on ImageNet on a checkpointed model, run:
+You can download pretrained weights from NGC:
 
-`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --resume <path to checkpoint> -b <batch size> <path to imagenet>`
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/zip -O seresnext101_32x4d_pyt_amp_20.06.0.zip
 
-To run inference on JPEG image, you have to first extract the model weights from checkpoint:
+unzip seresnext101_32x4d_pyt_amp_20.06.0.zip
+```
 
-`python checkpoint2model.py --checkpoint-path <path to checkpoint> --weight-path <path where weights will be stored>`
+To run inference on ImageNet, run:
 
-Then run classification script:
+`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_se-resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
 
-`python classify.py --arch se-resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch se-resnext101-32x4d -c fanin --weights nvidia_se-resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Advanced
@@ -432,6 +436,19 @@ Metrics gathered through training:
  - `train.data_time` - time spent on waiting on data
  - `train.compute_time` - time spent in forward/backward pass
 
+To restart training from checkpoint use `--resume` option.
+
+To start training from pretrained weights (e.g. downloaded from NGC) use `--pretrained-weights` option.
+
+The difference between those two is that the pretrained weights contain only model weights,
+and checkpoints, apart from model weights, contain optimizer state, LR scheduler state, RNG state.
+
+Checkpoints are suitable for dividing the training into parts, for example in order
+to divide the training job into shorter stages, or restart training after infrastructure fail.
+
+Pretrained weights can be used as a base for finetuning the model to a different dataset,
+or as a backbone to detection models.
+
 ### Inference process
 
 Validation is done every epoch, and can be also run separately on a checkpointed model.
@@ -457,6 +474,27 @@ Then run classification script:
 
 `python classify.py --arch se-resnext101-32x4d -c fanin --weights <path to weights from previous step> --precision AMP|FP32 --image <path to JPEG image>`
 
+You can also run ImageNet validation on pretrained weights:
+
+`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights <path to pretrained weights> -b <batch size> <path to imagenet>`
+
+#### NGC Pretrained weights:
+
+Pretrained weights can be downloaded from NGC:
+
+```bash
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/zip -O seresnext101_32x4d_pyt_amp_20.06.0.zip
+
+unzip seresnext101_32x4d_pyt_amp_20.06.0.zip
+```
+
+To run inference on ImageNet, run:
+
+`python ./main.py --arch se-resnext101-32x4d --evaluate --epochs 1 --pretrained-weights nvidia_se-resnext101-32x4d_200821.pth.tar -b <batch size> <path to imagenet>`
+
+To run inference on JPEG image using pretrained weights:
+
+`python classify.py --arch se-resnext101-32x4d -c fanin --weights nvidia_se-resnext101-32x4d_200821.pth.tar  --precision AMP|FP32 --image <path to JPEG image>`
 
 
 ## Performance