[ConvNets/TF1] Added Triton for ResNet
|
@ -32,7 +32,7 @@ allow_multiline_lambdas = True
|
|||
# # <------ this blank line
|
||||
# def method():
|
||||
# pass
|
||||
blank_line_before_nested_class_or_def = True
|
||||
blank_line_before_nested_class_or_def = False
|
||||
|
||||
# Insert a blank line before a module docstring.
|
||||
blank_line_before_module_docstring = True
|
||||
|
@ -83,7 +83,7 @@ continuation_indent_width = 4
|
|||
# start_ts=now()-timedelta(days=3),
|
||||
# end_ts=now(),
|
||||
# ) # <--- this bracket is dedented and on a separate line
|
||||
dedent_closing_brackets = True
|
||||
dedent_closing_brackets = False
|
||||
|
||||
# Disable the heuristic which places each list element on a separate line if the list is comma-terminated.
|
||||
disable_ending_comma_heuristic = false
|
||||
|
|
|
@ -1,8 +1,30 @@
|
|||
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.06-tf1-py3
|
||||
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.12-tf1-py3
|
||||
ARG TRITON_CLIENT_IMAGE_NAME=nvcr.io/nvidia/tritonserver:20.12-py3-sdk
|
||||
FROM ${TRITON_CLIENT_IMAGE_NAME} as triton-client
|
||||
FROM ${FROM_IMAGE_NAME}
|
||||
|
||||
ADD requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
# Install perf_client required library
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libb64-dev libb64-0d && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ADD . /workspace/rn50v15_tf
|
||||
# Install Triton Client PythonAPI and copy Perf Client
|
||||
COPY --from=triton-client /workspace/install/ /workspace/install/
|
||||
ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
|
||||
RUN find /workspace/install/python/ -iname triton*manylinux*.whl -exec pip install {}[all] \;
|
||||
|
||||
# Setup environmnent variables to access Triton Client lib and bin
|
||||
ENV PATH /workspace/install/bin:${PATH}
|
||||
|
||||
ENV PYTHONPATH /workspace/rn50v15_tf
|
||||
WORKDIR /workspace/rn50v15_tf
|
||||
|
||||
RUN pip uninstall -y typing
|
||||
|
||||
ADD requirements.txt .
|
||||
ADD triton/requirements.txt triton/requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
RUN pip install -r triton/requirements.txt
|
||||
|
||||
ADD . .
|
||||
|
|
|
@ -51,7 +51,7 @@ were averaged over an entire training epoch.
|
|||
The specific training script that was run is documented
|
||||
in the corresponding model's README.
|
||||
|
||||
The following table shows the training accuracy results of the
|
||||
The following table shows the training performance results of the
|
||||
three classification models side-by-side.
|
||||
|
||||
|
||||
|
@ -71,7 +71,7 @@ were averaged over an entire training epoch.
|
|||
The specific training script that was run is documented
|
||||
in the corresponding model's README.
|
||||
|
||||
The following table shows the training accuracy results of the
|
||||
The following table shows the training performance results of the
|
||||
three classification models side-by-side.
|
||||
|
||||
|
||||
|
|
436
TensorFlow/Classification/ConvNets/dataprep/build_image_data.py
Executable file
|
@ -0,0 +1,436 @@
|
|||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Converts image data to TFRecords file format with Example protos.
|
||||
|
||||
The image data set is expected to reside in JPEG files located in the
|
||||
following directory structure.
|
||||
|
||||
data_dir/label_0/image0.jpeg
|
||||
data_dir/label_0/image1.jpg
|
||||
...
|
||||
data_dir/label_1/weird-image.jpeg
|
||||
data_dir/label_1/my-image.jpeg
|
||||
...
|
||||
|
||||
where the sub-directory is the unique label associated with these images.
|
||||
|
||||
This TensorFlow script converts the training and evaluation data into
|
||||
a sharded data set consisting of TFRecord files
|
||||
|
||||
train_directory/train-00000-of-01024
|
||||
train_directory/train-00001-of-01024
|
||||
...
|
||||
train_directory/train-01023-of-01024
|
||||
|
||||
and
|
||||
|
||||
validation_directory/validation-00000-of-00128
|
||||
validation_directory/validation-00001-of-00128
|
||||
...
|
||||
validation_directory/validation-00127-of-00128
|
||||
|
||||
where we have selected 1024 and 128 shards for each data set. Each record
|
||||
within the TFRecord file is a serialized Example proto. The Example proto
|
||||
contains the following fields:
|
||||
|
||||
image/encoded: string containing JPEG encoded image in RGB colorspace
|
||||
image/height: integer, image height in pixels
|
||||
image/width: integer, image width in pixels
|
||||
image/colorspace: string, specifying the colorspace, always 'RGB'
|
||||
image/channels: integer, specifying the number of channels, always 3
|
||||
image/format: string, specifying the format, always 'JPEG'
|
||||
|
||||
image/filename: string containing the basename of the image file
|
||||
e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
|
||||
image/class/label: integer specifying the index in a classification layer.
|
||||
The label ranges from [0, num_labels] where 0 is unused and left as
|
||||
the background class.
|
||||
image/class/text: string specifying the human-readable version of the label
|
||||
e.g. 'dog'
|
||||
|
||||
If your data set involves bounding boxes, please look at build_imagenet_data.py.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
tf.app.flags.DEFINE_string('train_directory', '/tmp/',
|
||||
'Training data directory')
|
||||
tf.app.flags.DEFINE_string('validation_directory', '/tmp/',
|
||||
'Validation data directory')
|
||||
tf.app.flags.DEFINE_string('output_directory', '/tmp/',
|
||||
'Output data directory')
|
||||
|
||||
tf.app.flags.DEFINE_integer('train_shards', 2,
|
||||
'Number of shards in training TFRecord files.')
|
||||
tf.app.flags.DEFINE_integer('validation_shards', 2,
|
||||
'Number of shards in validation TFRecord files.')
|
||||
|
||||
tf.app.flags.DEFINE_integer('num_threads', 2,
|
||||
'Number of threads to preprocess the images.')
|
||||
|
||||
# The labels file contains a list of valid labels are held in this file.
|
||||
# Assumes that the file contains entries as such:
|
||||
# dog
|
||||
# cat
|
||||
# flower
|
||||
# where each line corresponds to a label. We map each label contained in
|
||||
# the file to an integer corresponding to the line number starting from 0.
|
||||
tf.app.flags.DEFINE_string('labels_file', '', 'Labels file')
|
||||
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def _int64_feature(value):
|
||||
"""Wrapper for inserting int64 features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
||||
|
||||
|
||||
def _bytes_feature(value):
|
||||
"""Wrapper for inserting bytes features into Example proto."""
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
|
||||
|
||||
|
||||
def _convert_to_example(filename, image_buffer, label, text, height, width):
|
||||
"""Build an Example proto for an example.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file, e.g., '/path/to/example.JPG'
|
||||
image_buffer: string, JPEG encoding of RGB image
|
||||
label: integer, identifier for the ground truth for the network
|
||||
text: string, unique human-readable, e.g. 'dog'
|
||||
height: integer, image height in pixels
|
||||
width: integer, image width in pixels
|
||||
Returns:
|
||||
Example proto
|
||||
"""
|
||||
|
||||
colorspace = 'RGB'
|
||||
channels = 3
|
||||
image_format = 'JPEG'
|
||||
|
||||
example = tf.train.Example(features=tf.train.Features(feature={
|
||||
'image/height': _int64_feature(height),
|
||||
'image/width': _int64_feature(width),
|
||||
'image/colorspace': _bytes_feature(tf.compat.as_bytes(colorspace)),
|
||||
'image/channels': _int64_feature(channels),
|
||||
'image/class/label': _int64_feature(label),
|
||||
'image/class/text': _bytes_feature(tf.compat.as_bytes(text)),
|
||||
'image/format': _bytes_feature(tf.compat.as_bytes(image_format)),
|
||||
'image/filename': _bytes_feature(tf.compat.as_bytes(os.path.basename(filename))),
|
||||
'image/encoded': _bytes_feature(tf.compat.as_bytes(image_buffer))}))
|
||||
return example
|
||||
|
||||
|
||||
class ImageCoder(object):
|
||||
"""Helper class that provides TensorFlow image coding utilities."""
|
||||
|
||||
def __init__(self):
|
||||
# Create a single Session to run all image coding calls.
|
||||
self._sess = tf.Session()
|
||||
|
||||
# Initializes function that converts PNG to JPEG data.
|
||||
self._png_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_png(self._png_data, channels=3)
|
||||
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that decodes RGB JPEG data.
|
||||
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
|
||||
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
|
||||
|
||||
def png_to_jpeg(self, image_data):
|
||||
return self._sess.run(self._png_to_jpeg,
|
||||
feed_dict={self._png_data: image_data})
|
||||
|
||||
def decode_jpeg(self, image_data):
|
||||
image = self._sess.run(self._decode_jpeg,
|
||||
feed_dict={self._decode_jpeg_data: image_data})
|
||||
assert len(image.shape) == 3
|
||||
assert image.shape[2] == 3
|
||||
return image
|
||||
|
||||
|
||||
def _is_png(filename):
|
||||
"""Determine if a file contains a PNG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a PNG.
|
||||
"""
|
||||
return filename.endswith('.png')
|
||||
|
||||
|
||||
def _process_image(filename, coder):
|
||||
"""Process a single image file.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file e.g., '/path/to/example.JPG'.
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
Returns:
|
||||
image_buffer: string, JPEG encoding of RGB image.
|
||||
height: integer, image height in pixels.
|
||||
width: integer, image width in pixels.
|
||||
"""
|
||||
# Read the image file.
|
||||
with tf.gfile.FastGFile(filename, 'rb') as f:
|
||||
image_data = f.read()
|
||||
|
||||
# Convert any PNG to JPEG's for consistency.
|
||||
if _is_png(filename):
|
||||
print('Converting PNG to JPEG for %s' % filename)
|
||||
image_data = coder.png_to_jpeg(image_data)
|
||||
|
||||
# Decode the RGB JPEG.
|
||||
image = coder.decode_jpeg(image_data)
|
||||
|
||||
# Check that image converted to RGB
|
||||
assert len(image.shape) == 3
|
||||
height = image.shape[0]
|
||||
width = image.shape[1]
|
||||
assert image.shape[2] == 3
|
||||
|
||||
return image_data, height, width
|
||||
|
||||
|
||||
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
|
||||
texts, labels, num_shards):
|
||||
"""Processes and saves list of images as TFRecord in 1 thread.
|
||||
|
||||
Args:
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
thread_index: integer, unique batch to run index is within [0, len(ranges)).
|
||||
ranges: list of pairs of integers specifying ranges of each batches to
|
||||
analyze in parallel.
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
texts: list of strings; each string is human readable, e.g. 'dog'
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
# Each thread produces N shards where N = int(num_shards / num_threads).
|
||||
# For instance, if num_shards = 128, and the num_threads = 2, then the first
|
||||
# thread would produce shards [0, 64).
|
||||
num_threads = len(ranges)
|
||||
assert not num_shards % num_threads
|
||||
num_shards_per_batch = int(num_shards / num_threads)
|
||||
|
||||
shard_ranges = np.linspace(ranges[thread_index][0],
|
||||
ranges[thread_index][1],
|
||||
num_shards_per_batch + 1).astype(int)
|
||||
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
|
||||
|
||||
counter = 0
|
||||
for s in range(num_shards_per_batch):
|
||||
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
|
||||
shard = thread_index * num_shards_per_batch + s
|
||||
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
|
||||
output_file = os.path.join(FLAGS.output_directory, output_filename)
|
||||
writer = tf.python_io.TFRecordWriter(output_file)
|
||||
|
||||
shard_counter = 0
|
||||
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
|
||||
for i in files_in_shard:
|
||||
filename = filenames[i]
|
||||
label = labels[i]
|
||||
text = texts[i]
|
||||
|
||||
try:
|
||||
image_buffer, height, width = _process_image(filename, coder)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('SKIPPED: Unexpected error while decoding %s.' % filename)
|
||||
continue
|
||||
|
||||
example = _convert_to_example(filename, image_buffer, label,
|
||||
text, height, width)
|
||||
writer.write(example.SerializeToString())
|
||||
shard_counter += 1
|
||||
counter += 1
|
||||
|
||||
if not counter % 1000:
|
||||
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
writer.close()
|
||||
print('%s [thread %d]: Wrote %d images to %s' %
|
||||
(datetime.now(), thread_index, shard_counter, output_file))
|
||||
sys.stdout.flush()
|
||||
shard_counter = 0
|
||||
print('%s [thread %d]: Wrote %d images to %d shards.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _process_image_files(name, filenames, texts, labels, num_shards):
|
||||
"""Process and save list of images as TFRecord of Example protos.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
texts: list of strings; each string is human readable, e.g. 'dog'
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
assert len(filenames) == len(texts)
|
||||
assert len(filenames) == len(labels)
|
||||
|
||||
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
|
||||
spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int)
|
||||
ranges = []
|
||||
for i in range(len(spacing) - 1):
|
||||
ranges.append([spacing[i], spacing[i + 1]])
|
||||
|
||||
# Launch a thread for each batch.
|
||||
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
|
||||
sys.stdout.flush()
|
||||
|
||||
# Create a mechanism for monitoring when all threads are finished.
|
||||
coord = tf.train.Coordinator()
|
||||
|
||||
# Create a generic TensorFlow-based utility for converting all image codings.
|
||||
coder = ImageCoder()
|
||||
|
||||
threads = []
|
||||
for thread_index in range(len(ranges)):
|
||||
args = (coder, thread_index, ranges, name, filenames,
|
||||
texts, labels, num_shards)
|
||||
t = threading.Thread(target=_process_image_files_batch, args=args)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
# Wait for all the threads to terminate.
|
||||
coord.join(threads)
|
||||
print('%s: Finished writing all %d images in data set.' %
|
||||
(datetime.now(), len(filenames)))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _find_image_files(data_dir, labels_file):
|
||||
"""Build a list of all images files and labels in the data set.
|
||||
|
||||
Args:
|
||||
data_dir: string, path to the root directory of images.
|
||||
|
||||
Assumes that the image data set resides in JPEG files located in
|
||||
the following directory structure.
|
||||
|
||||
data_dir/dog/another-image.JPEG
|
||||
data_dir/dog/my-image.jpg
|
||||
|
||||
where 'dog' is the label associated with these images.
|
||||
|
||||
labels_file: string, path to the labels file.
|
||||
|
||||
The list of valid labels are held in this file. Assumes that the file
|
||||
contains entries as such:
|
||||
dog
|
||||
cat
|
||||
flower
|
||||
where each line corresponds to a label. We map each label contained in
|
||||
the file to an integer starting with the integer 0 corresponding to the
|
||||
label contained in the first line.
|
||||
|
||||
Returns:
|
||||
filenames: list of strings; each string is a path to an image file.
|
||||
texts: list of strings; each string is the class, e.g. 'dog'
|
||||
labels: list of integer; each integer identifies the ground truth.
|
||||
"""
|
||||
print('Determining list of input files and labels from %s.' % data_dir)
|
||||
unique_labels = [l.strip() for l in tf.gfile.FastGFile(
|
||||
labels_file, 'r').readlines()]
|
||||
|
||||
labels = []
|
||||
filenames = []
|
||||
texts = []
|
||||
|
||||
# Leave label index 0 empty as a background class.
|
||||
label_index = 1
|
||||
|
||||
# Construct the list of JPEG files and labels.
|
||||
for text in unique_labels:
|
||||
jpeg_file_path = '%s/%s/*' % (data_dir, text)
|
||||
matching_files = tf.gfile.Glob(jpeg_file_path)
|
||||
|
||||
labels.extend([label_index] * len(matching_files))
|
||||
texts.extend([text] * len(matching_files))
|
||||
filenames.extend(matching_files)
|
||||
|
||||
if not label_index % 100:
|
||||
print('Finished finding files in %d of %d classes.' % (
|
||||
label_index, len(labels)))
|
||||
label_index += 1
|
||||
|
||||
# Shuffle the ordering of all image files in order to guarantee
|
||||
# random ordering of the images with respect to label in the
|
||||
# saved TFRecord files. Make the randomization repeatable.
|
||||
shuffled_index = list(range(len(filenames)))
|
||||
random.seed(12345)
|
||||
random.shuffle(shuffled_index)
|
||||
|
||||
filenames = [filenames[i] for i in shuffled_index]
|
||||
texts = [texts[i] for i in shuffled_index]
|
||||
labels = [labels[i] for i in shuffled_index]
|
||||
|
||||
print('Found %d JPEG files across %d labels inside %s.' %
|
||||
(len(filenames), len(unique_labels), data_dir))
|
||||
return filenames, texts, labels
|
||||
|
||||
|
||||
def _process_dataset(name, directory, num_shards, labels_file):
|
||||
"""Process a complete data set and save it as a TFRecord.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set.
|
||||
directory: string, root path to the data set.
|
||||
num_shards: integer number of shards for this data set.
|
||||
labels_file: string, path to the labels file.
|
||||
"""
|
||||
filenames, texts, labels = _find_image_files(directory, labels_file)
|
||||
_process_image_files(name, filenames, texts, labels, num_shards)
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
assert not FLAGS.train_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
|
||||
assert not FLAGS.validation_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with '
|
||||
'FLAGS.validation_shards')
|
||||
print('Saving results to %s' % FLAGS.output_directory)
|
||||
|
||||
# Run it!
|
||||
_process_dataset('validation', FLAGS.validation_directory,
|
||||
FLAGS.validation_shards, FLAGS.labels_file)
|
||||
_process_dataset('train', FLAGS.train_directory,
|
||||
FLAGS.train_shards, FLAGS.labels_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
|
@ -0,0 +1,707 @@
|
|||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Converts ImageNet data to TFRecords file format with Example protos.
|
||||
|
||||
The raw ImageNet data set is expected to reside in JPEG files located in the
|
||||
following directory structure.
|
||||
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
...
|
||||
|
||||
where 'n01440764' is the unique synset label associated with
|
||||
these images.
|
||||
|
||||
The training data set consists of 1000 sub-directories (i.e. labels)
|
||||
each containing 1200 JPEG images for a total of 1.2M JPEG images.
|
||||
|
||||
The evaluation data set consists of 1000 sub-directories (i.e. labels)
|
||||
each containing 50 JPEG images for a total of 50K JPEG images.
|
||||
|
||||
This TensorFlow script converts the training and evaluation data into
|
||||
a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
|
||||
|
||||
train_directory/train-00000-of-01024
|
||||
train_directory/train-00001-of-01024
|
||||
...
|
||||
train_directory/train-01023-of-01024
|
||||
|
||||
and
|
||||
|
||||
validation_directory/validation-00000-of-00128
|
||||
validation_directory/validation-00001-of-00128
|
||||
...
|
||||
validation_directory/validation-00127-of-00128
|
||||
|
||||
Each validation TFRecord file contains ~390 records. Each training TFREcord
|
||||
file contains ~1250 records. Each record within the TFRecord file is a
|
||||
serialized Example proto. The Example proto contains the following fields:
|
||||
|
||||
image/encoded: string containing JPEG encoded image in RGB colorspace
|
||||
image/height: integer, image height in pixels
|
||||
image/width: integer, image width in pixels
|
||||
image/colorspace: string, specifying the colorspace, always 'RGB'
|
||||
image/channels: integer, specifying the number of channels, always 3
|
||||
image/format: string, specifying the format, always 'JPEG'
|
||||
|
||||
image/filename: string containing the basename of the image file
|
||||
e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
|
||||
image/class/label: integer specifying the index in a classification layer.
|
||||
The label ranges from [1, 1000] where 0 is not used.
|
||||
image/class/synset: string specifying the unique ID of the label,
|
||||
e.g. 'n01440764'
|
||||
image/class/text: string specifying the human-readable version of the label
|
||||
e.g. 'red fox, Vulpes vulpes'
|
||||
|
||||
image/object/bbox/xmin: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/xmax: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymin: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymax: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/label: integer specifying the index in a classification
|
||||
layer. The label ranges from [1, 1000] where 0 is not used. Note this is
|
||||
always identical to the image label.
|
||||
|
||||
Note that the length of xmin is identical to the length of xmax, ymin and ymax
|
||||
for each example.
|
||||
|
||||
Running this script using 16 threads may take around ~2.5 hours on an HP Z420.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import numpy as np
|
||||
import six
|
||||
import tensorflow as tf
|
||||
|
||||
tf.app.flags.DEFINE_string('train_directory', '/tmp/',
|
||||
'Training data directory')
|
||||
tf.app.flags.DEFINE_string('validation_directory', '/tmp/',
|
||||
'Validation data directory')
|
||||
tf.app.flags.DEFINE_string('output_directory', '/tmp/',
|
||||
'Output data directory')
|
||||
|
||||
tf.app.flags.DEFINE_integer('train_shards', 1024,
|
||||
'Number of shards in training TFRecord files.')
|
||||
tf.app.flags.DEFINE_integer('validation_shards', 128,
|
||||
'Number of shards in validation TFRecord files.')
|
||||
|
||||
tf.app.flags.DEFINE_integer('num_threads', 8,
|
||||
'Number of threads to preprocess the images.')
|
||||
|
||||
# The labels file contains a list of valid labels are held in this file.
|
||||
# Assumes that the file contains entries as such:
|
||||
# n01440764
|
||||
# n01443537
|
||||
# n01484850
|
||||
# where each line corresponds to a label expressed as a synset. We map
|
||||
# each synset contained in the file to an integer (based on the alphabetical
|
||||
# ordering). See below for details.
|
||||
tf.app.flags.DEFINE_string('labels_file',
|
||||
'imagenet_lsvrc_2015_synsets.txt',
|
||||
'Labels file')
|
||||
|
||||
# This file containing mapping from synset to human-readable label.
|
||||
# Assumes each line of the file looks like:
|
||||
#
|
||||
# n02119247 black fox
|
||||
# n02119359 silver fox
|
||||
# n02119477 red fox, Vulpes fulva
|
||||
#
|
||||
# where each line corresponds to a unique mapping. Note that each line is
|
||||
# formatted as <synset>\t<human readable label>.
|
||||
tf.app.flags.DEFINE_string('imagenet_metadata_file',
|
||||
'imagenet_metadata.txt',
|
||||
'ImageNet metadata file')
|
||||
|
||||
# This file is the output of process_bounding_box.py
|
||||
# Assumes each line of the file looks like:
|
||||
#
|
||||
# n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
|
||||
#
|
||||
# where each line corresponds to one bounding box annotation associated
|
||||
# with an image. Each line can be parsed as:
|
||||
#
|
||||
# <JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
|
||||
#
|
||||
# Note that there might exist mulitple bounding box annotations associated
|
||||
# with an image file.
|
||||
tf.app.flags.DEFINE_string('bounding_box_file',
|
||||
'./imagenet_2012_bounding_boxes.csv',
|
||||
'Bounding box file')
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def _int64_feature(value):
|
||||
"""Wrapper for inserting int64 features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
||||
|
||||
|
||||
def _float_feature(value):
|
||||
"""Wrapper for inserting float features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
|
||||
|
||||
|
||||
def _bytes_feature(value):
|
||||
"""Wrapper for inserting bytes features into Example proto."""
|
||||
if six.PY3 and isinstance(value, six.text_type):
|
||||
value = six.binary_type(value, encoding='utf-8')
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
|
||||
|
||||
|
||||
def _convert_to_example(filename, image_buffer, label, synset, human, bbox,
|
||||
height, width):
|
||||
"""Build an Example proto for an example.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file, e.g., '/path/to/example.JPG'
|
||||
image_buffer: string, JPEG encoding of RGB image
|
||||
label: integer, identifier for the ground truth for the network
|
||||
synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
|
||||
human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
|
||||
bbox: list of bounding boxes; each box is a list of integers
|
||||
specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to
|
||||
the same label as the image label.
|
||||
height: integer, image height in pixels
|
||||
width: integer, image width in pixels
|
||||
Returns:
|
||||
Example proto
|
||||
"""
|
||||
xmin = []
|
||||
ymin = []
|
||||
xmax = []
|
||||
ymax = []
|
||||
for b in bbox:
|
||||
assert len(b) == 4
|
||||
# pylint: disable=expression-not-assigned
|
||||
[l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
|
||||
# pylint: enable=expression-not-assigned
|
||||
|
||||
colorspace = 'RGB'
|
||||
channels = 3
|
||||
image_format = 'JPEG'
|
||||
|
||||
example = tf.train.Example(features=tf.train.Features(feature={
|
||||
'image/height': _int64_feature(height),
|
||||
'image/width': _int64_feature(width),
|
||||
'image/colorspace': _bytes_feature(colorspace),
|
||||
'image/channels': _int64_feature(channels),
|
||||
'image/class/label': _int64_feature(label),
|
||||
'image/class/synset': _bytes_feature(synset),
|
||||
'image/class/text': _bytes_feature(human),
|
||||
'image/object/bbox/xmin': _float_feature(xmin),
|
||||
'image/object/bbox/xmax': _float_feature(xmax),
|
||||
'image/object/bbox/ymin': _float_feature(ymin),
|
||||
'image/object/bbox/ymax': _float_feature(ymax),
|
||||
'image/object/bbox/label': _int64_feature([label] * len(xmin)),
|
||||
'image/format': _bytes_feature(image_format),
|
||||
'image/filename': _bytes_feature(os.path.basename(filename)),
|
||||
'image/encoded': _bytes_feature(image_buffer)}))
|
||||
return example
|
||||
|
||||
|
||||
class ImageCoder(object):
|
||||
"""Helper class that provides TensorFlow image coding utilities."""
|
||||
|
||||
def __init__(self):
|
||||
# Create a single Session to run all image coding calls.
|
||||
self._sess = tf.Session()
|
||||
|
||||
# Initializes function that converts PNG to JPEG data.
|
||||
self._png_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_png(self._png_data, channels=3)
|
||||
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that converts CMYK JPEG data to RGB JPEG data.
|
||||
self._cmyk_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
|
||||
self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that decodes RGB JPEG data.
|
||||
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
|
||||
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
|
||||
|
||||
def png_to_jpeg(self, image_data):
|
||||
return self._sess.run(self._png_to_jpeg,
|
||||
feed_dict={self._png_data: image_data})
|
||||
|
||||
def cmyk_to_rgb(self, image_data):
|
||||
return self._sess.run(self._cmyk_to_rgb,
|
||||
feed_dict={self._cmyk_data: image_data})
|
||||
|
||||
def decode_jpeg(self, image_data):
|
||||
image = self._sess.run(self._decode_jpeg,
|
||||
feed_dict={self._decode_jpeg_data: image_data})
|
||||
assert len(image.shape) == 3
|
||||
assert image.shape[2] == 3
|
||||
return image
|
||||
|
||||
|
||||
def _is_png(filename):
|
||||
"""Determine if a file contains a PNG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a PNG.
|
||||
"""
|
||||
# File list from:
|
||||
# https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU
|
||||
return 'n02105855_2933.JPEG' in filename
|
||||
|
||||
|
||||
def _is_cmyk(filename):
|
||||
"""Determine if file contains a CMYK JPEG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a JPEG encoded with CMYK color space.
|
||||
"""
|
||||
# File list from:
|
||||
# https://github.com/cytsai/ilsvrc-cmyk-image-list
|
||||
blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
|
||||
'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
|
||||
'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
|
||||
'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
|
||||
'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
|
||||
'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
|
||||
'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
|
||||
'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
|
||||
'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
|
||||
'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
|
||||
'n07583066_647.JPEG', 'n13037406_4650.JPEG']
|
||||
return filename.split('/')[-1] in blacklist
|
||||
|
||||
|
||||
def _process_image(filename, coder):
|
||||
"""Process a single image file.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file e.g., '/path/to/example.JPG'.
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
Returns:
|
||||
image_buffer: string, JPEG encoding of RGB image.
|
||||
height: integer, image height in pixels.
|
||||
width: integer, image width in pixels.
|
||||
"""
|
||||
# Read the image file.
|
||||
with tf.gfile.FastGFile(filename, 'rb') as f:
|
||||
image_data = f.read()
|
||||
|
||||
# Clean the dirty data.
|
||||
if _is_png(filename):
|
||||
# 1 image is a PNG.
|
||||
print('Converting PNG to JPEG for %s' % filename)
|
||||
image_data = coder.png_to_jpeg(image_data)
|
||||
elif _is_cmyk(filename):
|
||||
# 22 JPEG images are in CMYK colorspace.
|
||||
print('Converting CMYK to RGB for %s' % filename)
|
||||
image_data = coder.cmyk_to_rgb(image_data)
|
||||
|
||||
# Decode the RGB JPEG.
|
||||
image = coder.decode_jpeg(image_data)
|
||||
|
||||
# Check that image converted to RGB
|
||||
assert len(image.shape) == 3
|
||||
height = image.shape[0]
|
||||
width = image.shape[1]
|
||||
assert image.shape[2] == 3
|
||||
|
||||
return image_data, height, width
|
||||
|
||||
|
||||
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
|
||||
synsets, labels, humans, bboxes, num_shards):
|
||||
"""Processes and saves list of images as TFRecord in 1 thread.
|
||||
|
||||
Args:
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
thread_index: integer, unique batch to run index is within [0, len(ranges)).
|
||||
ranges: list of pairs of integers specifying ranges of each batches to
|
||||
analyze in parallel.
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
synsets: list of strings; each string is a unique WordNet ID
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
humans: list of strings; each string is a human-readable label
|
||||
bboxes: list of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
# Each thread produces N shards where N = int(num_shards / num_threads).
|
||||
# For instance, if num_shards = 128, and the num_threads = 2, then the first
|
||||
# thread would produce shards [0, 64).
|
||||
num_threads = len(ranges)
|
||||
assert not num_shards % num_threads
|
||||
num_shards_per_batch = int(num_shards / num_threads)
|
||||
|
||||
shard_ranges = np.linspace(ranges[thread_index][0],
|
||||
ranges[thread_index][1],
|
||||
num_shards_per_batch + 1).astype(int)
|
||||
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
|
||||
|
||||
counter = 0
|
||||
for s in range(num_shards_per_batch):
|
||||
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
|
||||
shard = thread_index * num_shards_per_batch + s
|
||||
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
|
||||
output_file = os.path.join(FLAGS.output_directory, output_filename)
|
||||
writer = tf.python_io.TFRecordWriter(output_file)
|
||||
|
||||
shard_counter = 0
|
||||
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
|
||||
for i in files_in_shard:
|
||||
filename = filenames[i]
|
||||
label = labels[i]
|
||||
synset = synsets[i]
|
||||
human = humans[i]
|
||||
bbox = bboxes[i]
|
||||
|
||||
image_buffer, height, width = _process_image(filename, coder)
|
||||
|
||||
example = _convert_to_example(filename, image_buffer, label,
|
||||
synset, human, bbox,
|
||||
height, width)
|
||||
writer.write(example.SerializeToString())
|
||||
shard_counter += 1
|
||||
counter += 1
|
||||
|
||||
if not counter % 1000:
|
||||
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
writer.close()
|
||||
print('%s [thread %d]: Wrote %d images to %s' %
|
||||
(datetime.now(), thread_index, shard_counter, output_file))
|
||||
sys.stdout.flush()
|
||||
shard_counter = 0
|
||||
print('%s [thread %d]: Wrote %d images to %d shards.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _process_image_files(name, filenames, synsets, labels, humans,
|
||||
bboxes, num_shards):
|
||||
"""Process and save list of images as TFRecord of Example protos.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
synsets: list of strings; each string is a unique WordNet ID
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
humans: list of strings; each string is a human-readable label
|
||||
bboxes: list of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
assert len(filenames) == len(synsets)
|
||||
assert len(filenames) == len(labels)
|
||||
assert len(filenames) == len(humans)
|
||||
assert len(filenames) == len(bboxes)
|
||||
|
||||
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
|
||||
spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int)
|
||||
ranges = []
|
||||
threads = []
|
||||
for i in range(len(spacing) - 1):
|
||||
ranges.append([spacing[i], spacing[i + 1]])
|
||||
|
||||
# Launch a thread for each batch.
|
||||
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
|
||||
sys.stdout.flush()
|
||||
|
||||
# Create a mechanism for monitoring when all threads are finished.
|
||||
coord = tf.train.Coordinator()
|
||||
|
||||
# Create a generic TensorFlow-based utility for converting all image codings.
|
||||
coder = ImageCoder()
|
||||
|
||||
threads = []
|
||||
for thread_index in range(len(ranges)):
|
||||
args = (coder, thread_index, ranges, name, filenames,
|
||||
synsets, labels, humans, bboxes, num_shards)
|
||||
t = threading.Thread(target=_process_image_files_batch, args=args)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
# Wait for all the threads to terminate.
|
||||
coord.join(threads)
|
||||
print('%s: Finished writing all %d images in data set.' %
|
||||
(datetime.now(), len(filenames)))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _find_image_files(data_dir, labels_file):
|
||||
"""Build a list of all images files and labels in the data set.
|
||||
|
||||
Args:
|
||||
data_dir: string, path to the root directory of images.
|
||||
|
||||
Assumes that the ImageNet data set resides in JPEG files located in
|
||||
the following directory structure.
|
||||
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
|
||||
where 'n01440764' is the unique synset label associated with these images.
|
||||
|
||||
labels_file: string, path to the labels file.
|
||||
|
||||
The list of valid labels are held in this file. Assumes that the file
|
||||
contains entries as such:
|
||||
n01440764
|
||||
n01443537
|
||||
n01484850
|
||||
where each line corresponds to a label expressed as a synset. We map
|
||||
each synset contained in the file to an integer (based on the alphabetical
|
||||
ordering) starting with the integer 1 corresponding to the synset
|
||||
contained in the first line.
|
||||
|
||||
The reason we start the integer labels at 1 is to reserve label 0 as an
|
||||
unused background class.
|
||||
|
||||
Returns:
|
||||
filenames: list of strings; each string is a path to an image file.
|
||||
synsets: list of strings; each string is a unique WordNet ID.
|
||||
labels: list of integer; each integer identifies the ground truth.
|
||||
"""
|
||||
print('Determining list of input files and labels from %s.' % data_dir)
|
||||
challenge_synsets = [l.strip() for l in
|
||||
tf.gfile.FastGFile(labels_file, 'r').readlines()]
|
||||
|
||||
labels = []
|
||||
filenames = []
|
||||
synsets = []
|
||||
|
||||
# Leave label index 0 empty as a background class.
|
||||
label_index = 1
|
||||
|
||||
# Construct the list of JPEG files and labels.
|
||||
for synset in challenge_synsets:
|
||||
jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset)
|
||||
matching_files = tf.gfile.Glob(jpeg_file_path)
|
||||
|
||||
labels.extend([label_index] * len(matching_files))
|
||||
synsets.extend([synset] * len(matching_files))
|
||||
filenames.extend(matching_files)
|
||||
|
||||
if not label_index % 100:
|
||||
print('Finished finding files in %d of %d classes.' % (
|
||||
label_index, len(challenge_synsets)))
|
||||
label_index += 1
|
||||
|
||||
# Shuffle the ordering of all image files in order to guarantee
|
||||
# random ordering of the images with respect to label in the
|
||||
# saved TFRecord files. Make the randomization repeatable.
|
||||
shuffled_index = list(range(len(filenames)))
|
||||
random.seed(12345)
|
||||
random.shuffle(shuffled_index)
|
||||
|
||||
filenames = [filenames[i] for i in shuffled_index]
|
||||
synsets = [synsets[i] for i in shuffled_index]
|
||||
labels = [labels[i] for i in shuffled_index]
|
||||
|
||||
print('Found %d JPEG files across %d labels inside %s.' %
|
||||
(len(filenames), len(challenge_synsets), data_dir))
|
||||
return filenames, synsets, labels
|
||||
|
||||
|
||||
def _find_human_readable_labels(synsets, synset_to_human):
|
||||
"""Build a list of human-readable labels.
|
||||
|
||||
Args:
|
||||
synsets: list of strings; each string is a unique WordNet ID.
|
||||
synset_to_human: dict of synset to human labels, e.g.,
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
|
||||
Returns:
|
||||
List of human-readable strings corresponding to each synset.
|
||||
"""
|
||||
humans = []
|
||||
for s in synsets:
|
||||
assert s in synset_to_human, ('Failed to find: %s' % s)
|
||||
humans.append(synset_to_human[s])
|
||||
return humans
|
||||
|
||||
|
||||
def _find_image_bounding_boxes(filenames, image_to_bboxes):
|
||||
"""Find the bounding boxes for a given image file.
|
||||
|
||||
Args:
|
||||
filenames: list of strings; each string is a path to an image file.
|
||||
image_to_bboxes: dictionary mapping image file names to a list of
|
||||
bounding boxes. This list contains 0+ bounding boxes.
|
||||
Returns:
|
||||
List of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
"""
|
||||
num_image_bbox = 0
|
||||
bboxes = []
|
||||
for f in filenames:
|
||||
basename = os.path.basename(f)
|
||||
if basename in image_to_bboxes:
|
||||
bboxes.append(image_to_bboxes[basename])
|
||||
num_image_bbox += 1
|
||||
else:
|
||||
bboxes.append([])
|
||||
print('Found %d images with bboxes out of %d images' % (
|
||||
num_image_bbox, len(filenames)))
|
||||
return bboxes
|
||||
|
||||
|
||||
def _process_dataset(name, directory, num_shards, synset_to_human,
|
||||
image_to_bboxes):
|
||||
"""Process a complete data set and save it as a TFRecord.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set.
|
||||
directory: string, root path to the data set.
|
||||
num_shards: integer number of shards for this data set.
|
||||
synset_to_human: dict of synset to human labels, e.g.,
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
image_to_bboxes: dictionary mapping image file names to a list of
|
||||
bounding boxes. This list contains 0+ bounding boxes.
|
||||
"""
|
||||
filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
|
||||
humans = _find_human_readable_labels(synsets, synset_to_human)
|
||||
bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes)
|
||||
_process_image_files(name, filenames, synsets, labels,
|
||||
humans, bboxes, num_shards)
|
||||
|
||||
|
||||
def _build_synset_lookup(imagenet_metadata_file):
|
||||
"""Build lookup for synset to human-readable label.
|
||||
|
||||
Args:
|
||||
imagenet_metadata_file: string, path to file containing mapping from
|
||||
synset to human-readable label.
|
||||
|
||||
Assumes each line of the file looks like:
|
||||
|
||||
n02119247 black fox
|
||||
n02119359 silver fox
|
||||
n02119477 red fox, Vulpes fulva
|
||||
|
||||
where each line corresponds to a unique mapping. Note that each line is
|
||||
formatted as <synset>\t<human readable label>.
|
||||
|
||||
Returns:
|
||||
Dictionary of synset to human labels, such as:
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
"""
|
||||
lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines()
|
||||
synset_to_human = {}
|
||||
for l in lines:
|
||||
if l:
|
||||
parts = l.strip().split('\t')
|
||||
assert len(parts) == 2
|
||||
synset = parts[0]
|
||||
human = parts[1]
|
||||
synset_to_human[synset] = human
|
||||
return synset_to_human
|
||||
|
||||
|
||||
def _build_bounding_box_lookup(bounding_box_file):
|
||||
"""Build a lookup from image file to bounding boxes.
|
||||
|
||||
Args:
|
||||
bounding_box_file: string, path to file with bounding boxes annotations.
|
||||
|
||||
Assumes each line of the file looks like:
|
||||
|
||||
n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
|
||||
|
||||
where each line corresponds to one bounding box annotation associated
|
||||
with an image. Each line can be parsed as:
|
||||
|
||||
<JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
|
||||
|
||||
Note that there might exist mulitple bounding box annotations associated
|
||||
with an image file. This file is the output of process_bounding_boxes.py.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping image file names to a list of bounding boxes. This list
|
||||
contains 0+ bounding boxes.
|
||||
"""
|
||||
lines = tf.gfile.FastGFile(bounding_box_file, 'r').readlines()
|
||||
images_to_bboxes = {}
|
||||
num_bbox = 0
|
||||
num_image = 0
|
||||
for l in lines:
|
||||
if l:
|
||||
parts = l.split(',')
|
||||
assert len(parts) == 5, ('Failed to parse: %s' % l)
|
||||
filename = parts[0]
|
||||
xmin = float(parts[1])
|
||||
ymin = float(parts[2])
|
||||
xmax = float(parts[3])
|
||||
ymax = float(parts[4])
|
||||
box = [xmin, ymin, xmax, ymax]
|
||||
|
||||
if filename not in images_to_bboxes:
|
||||
images_to_bboxes[filename] = []
|
||||
num_image += 1
|
||||
images_to_bboxes[filename].append(box)
|
||||
num_bbox += 1
|
||||
|
||||
print('Successfully read %d bounding boxes '
|
||||
'across %d images.' % (num_bbox, num_image))
|
||||
return images_to_bboxes
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
assert not FLAGS.train_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
|
||||
assert not FLAGS.validation_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with '
|
||||
'FLAGS.validation_shards')
|
||||
print('Saving results to %s' % FLAGS.output_directory)
|
||||
|
||||
# Build a map from synset to human-readable label.
|
||||
synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file)
|
||||
image_to_bboxes = _build_bounding_box_lookup(FLAGS.bounding_box_file)
|
||||
|
||||
# Run it!
|
||||
_process_dataset('validation', FLAGS.validation_directory,
|
||||
FLAGS.validation_shards, synset_to_human, image_to_bboxes)
|
||||
_process_dataset('train', FLAGS.train_directory, FLAGS.train_shards,
|
||||
synset_to_human, image_to_bboxes)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
|
@ -0,0 +1,618 @@
|
|||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Converts ImageNet data to TFRecords file format with Example protos.
|
||||
|
||||
The raw ImageNet data set is expected to reside in JPEG files located in the
|
||||
following directory structure.
|
||||
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
...
|
||||
|
||||
where 'n01440764' is the unique synset label associated with
|
||||
these images.
|
||||
|
||||
The training data set consists of 1000 sub-directories (i.e. labels)
|
||||
each containing 1200 JPEG images for a total of 1.2M JPEG images.
|
||||
|
||||
The evaluation data set consists of 1000 sub-directories (i.e. labels)
|
||||
each containing 50 JPEG images for a total of 50K JPEG images.
|
||||
|
||||
This TensorFlow script converts the training and evaluation data into
|
||||
a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
|
||||
|
||||
train_directory/train-00000-of-01024
|
||||
train_directory/train-00001-of-01024
|
||||
...
|
||||
train_directory/train-01023-of-01024
|
||||
|
||||
and
|
||||
|
||||
validation_directory/validation-00000-of-00128
|
||||
validation_directory/validation-00001-of-00128
|
||||
...
|
||||
validation_directory/validation-00127-of-00128
|
||||
|
||||
Each validation TFRecord file contains ~390 records. Each training TFREcord
|
||||
file contains ~1250 records. Each record within the TFRecord file is a
|
||||
serialized Example proto. The Example proto contains the following fields:
|
||||
|
||||
image/encoded: string containing JPEG encoded image in RGB colorspace
|
||||
image/height: integer, image height in pixels
|
||||
image/width: integer, image width in pixels
|
||||
image/colorspace: string, specifying the colorspace, always 'RGB'
|
||||
image/channels: integer, specifying the number of channels, always 3
|
||||
image/format: string, specifying the format, always 'JPEG'
|
||||
|
||||
image/filename: string containing the basename of the image file
|
||||
e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
|
||||
image/class/label: integer specifying the index in a classification layer.
|
||||
The label ranges from [1, 1000] where 0 is not used.
|
||||
image/class/synset: string specifying the unique ID of the label,
|
||||
e.g. 'n01440764'
|
||||
image/class/text: string specifying the human-readable version of the label
|
||||
e.g. 'red fox, Vulpes vulpes'
|
||||
|
||||
image/object/bbox/xmin: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/xmax: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymin: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymax: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/label: integer specifying the index in a classification
|
||||
layer. The label ranges from [1, 1000] where 0 is not used. Note this is
|
||||
always identical to the image label.
|
||||
|
||||
Note that the length of xmin is identical to the length of xmax, ymin and ymax
|
||||
for each example.
|
||||
|
||||
Running this script using 16 threads may take around ~2.5 hours on an HP Z420.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import numpy as np
|
||||
import six
|
||||
import tensorflow as tf
|
||||
|
||||
tf.app.flags.DEFINE_string('train_directory', '/tmp/',
|
||||
'Training data directory')
|
||||
tf.app.flags.DEFINE_string('validation_directory', '/tmp/',
|
||||
'Validation data directory')
|
||||
tf.app.flags.DEFINE_string('output_directory', '/tmp/',
|
||||
'Output data directory')
|
||||
|
||||
tf.app.flags.DEFINE_integer('train_shards', 1024,
|
||||
'Number of shards in training TFRecord files.')
|
||||
tf.app.flags.DEFINE_integer('validation_shards', 128,
|
||||
'Number of shards in validation TFRecord files.')
|
||||
|
||||
tf.app.flags.DEFINE_integer('num_threads', 8,
|
||||
'Number of threads to preprocess the images.')
|
||||
|
||||
# The labels file contains a list of valid labels are held in this file.
|
||||
# Assumes that the file contains entries as such:
|
||||
# n01440764
|
||||
# n01443537
|
||||
# n01484850
|
||||
# where each line corresponds to a label expressed as a synset. We map
|
||||
# each synset contained in the file to an integer (based on the alphabetical
|
||||
# ordering). See below for details.
|
||||
tf.app.flags.DEFINE_string('labels_file',
|
||||
'imagenet_lsvrc_2015_synsets.txt',
|
||||
'Labels file')
|
||||
|
||||
# This file containing mapping from synset to human-readable label.
|
||||
# Assumes each line of the file looks like:
|
||||
#
|
||||
# n02119247 black fox
|
||||
# n02119359 silver fox
|
||||
# n02119477 red fox, Vulpes fulva
|
||||
#
|
||||
# where each line corresponds to a unique mapping. Note that each line is
|
||||
# formatted as <synset>\t<human readable label>.
|
||||
tf.app.flags.DEFINE_string('imagenet_metadata_file',
|
||||
'imagenet_metadata.txt',
|
||||
'ImageNet metadata file')
|
||||
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def _int64_feature(value):
|
||||
"""Wrapper for inserting int64 features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
||||
|
||||
|
||||
def _float_feature(value):
|
||||
"""Wrapper for inserting float features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
|
||||
|
||||
|
||||
def _bytes_feature(value):
|
||||
"""Wrapper for inserting bytes features into Example proto."""
|
||||
if six.PY3 and isinstance(value, six.text_type):
|
||||
value = six.binary_type(value, encoding='utf-8')
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
|
||||
|
||||
|
||||
def _convert_to_example(filename, image_buffer, label, synset, human, bbox,
|
||||
height, width):
|
||||
"""Build an Example proto for an example.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file, e.g., '/path/to/example.JPG'
|
||||
image_buffer: string, JPEG encoding of RGB image
|
||||
label: integer, identifier for the ground truth for the network
|
||||
synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
|
||||
human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
|
||||
bbox: list of bounding boxes; each box is a list of integers
|
||||
specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to
|
||||
the same label as the image label.
|
||||
height: integer, image height in pixels
|
||||
width: integer, image width in pixels
|
||||
Returns:
|
||||
Example proto
|
||||
"""
|
||||
xmin = []
|
||||
ymin = []
|
||||
xmax = []
|
||||
ymax = []
|
||||
for b in bbox:
|
||||
assert len(b) == 4
|
||||
# pylint: disable=expression-not-assigned
|
||||
[l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
|
||||
# pylint: enable=expression-not-assigned
|
||||
|
||||
colorspace = 'RGB'
|
||||
channels = 3
|
||||
image_format = 'JPEG'
|
||||
|
||||
example = tf.train.Example(features=tf.train.Features(feature={
|
||||
'image/height': _int64_feature(height),
|
||||
'image/width': _int64_feature(width),
|
||||
'image/colorspace': _bytes_feature(colorspace),
|
||||
'image/channels': _int64_feature(channels),
|
||||
'image/class/label': _int64_feature(label),
|
||||
'image/class/synset': _bytes_feature(synset),
|
||||
'image/class/text': _bytes_feature(human),
|
||||
'image/object/bbox/xmin': _float_feature(xmin),
|
||||
'image/object/bbox/xmax': _float_feature(xmax),
|
||||
'image/object/bbox/ymin': _float_feature(ymin),
|
||||
'image/object/bbox/ymax': _float_feature(ymax),
|
||||
'image/object/bbox/label': _int64_feature([label] * len(xmin)),
|
||||
'image/format': _bytes_feature(image_format),
|
||||
'image/filename': _bytes_feature(os.path.basename(filename)),
|
||||
'image/encoded': _bytes_feature(image_buffer)}))
|
||||
return example
|
||||
|
||||
|
||||
class ImageCoder(object):
|
||||
"""Helper class that provides TensorFlow image coding utilities."""
|
||||
|
||||
def __init__(self):
|
||||
# Create a single Session to run all image coding calls.
|
||||
self._sess = tf.Session()
|
||||
|
||||
# Initializes function that converts PNG to JPEG data.
|
||||
self._png_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_png(self._png_data, channels=3)
|
||||
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that converts CMYK JPEG data to RGB JPEG data.
|
||||
self._cmyk_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
|
||||
self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that decodes RGB JPEG data.
|
||||
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
|
||||
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
|
||||
|
||||
def png_to_jpeg(self, image_data):
|
||||
return self._sess.run(self._png_to_jpeg,
|
||||
feed_dict={self._png_data: image_data})
|
||||
|
||||
def cmyk_to_rgb(self, image_data):
|
||||
return self._sess.run(self._cmyk_to_rgb,
|
||||
feed_dict={self._cmyk_data: image_data})
|
||||
|
||||
def decode_jpeg(self, image_data):
|
||||
image = self._sess.run(self._decode_jpeg,
|
||||
feed_dict={self._decode_jpeg_data: image_data})
|
||||
assert len(image.shape) == 3
|
||||
assert image.shape[2] == 3
|
||||
return image
|
||||
|
||||
|
||||
def _is_png(filename):
|
||||
"""Determine if a file contains a PNG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a PNG.
|
||||
"""
|
||||
# File list from:
|
||||
# https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU
|
||||
return 'n02105855_2933.JPEG' in filename
|
||||
|
||||
|
||||
def _is_cmyk(filename):
|
||||
"""Determine if file contains a CMYK JPEG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a JPEG encoded with CMYK color space.
|
||||
"""
|
||||
# File list from:
|
||||
# https://github.com/cytsai/ilsvrc-cmyk-image-list
|
||||
blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
|
||||
'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
|
||||
'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
|
||||
'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
|
||||
'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
|
||||
'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
|
||||
'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
|
||||
'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
|
||||
'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
|
||||
'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
|
||||
'n07583066_647.JPEG', 'n13037406_4650.JPEG']
|
||||
return filename.split('/')[-1] in blacklist
|
||||
|
||||
|
||||
def _process_image(filename, coder):
|
||||
"""Process a single image file.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file e.g., '/path/to/example.JPG'.
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
Returns:
|
||||
image_buffer: string, JPEG encoding of RGB image.
|
||||
height: integer, image height in pixels.
|
||||
width: integer, image width in pixels.
|
||||
"""
|
||||
# Read the image file.
|
||||
with tf.gfile.FastGFile(filename, 'rb') as f:
|
||||
image_data = f.read()
|
||||
|
||||
# Clean the dirty data.
|
||||
if _is_png(filename):
|
||||
# 1 image is a PNG.
|
||||
print('Converting PNG to JPEG for %s' % filename)
|
||||
image_data = coder.png_to_jpeg(image_data)
|
||||
elif _is_cmyk(filename):
|
||||
# 22 JPEG images are in CMYK colorspace.
|
||||
print('Converting CMYK to RGB for %s' % filename)
|
||||
image_data = coder.cmyk_to_rgb(image_data)
|
||||
|
||||
# Decode the RGB JPEG.
|
||||
image = coder.decode_jpeg(image_data)
|
||||
|
||||
# Check that image converted to RGB
|
||||
assert len(image.shape) == 3
|
||||
height = image.shape[0]
|
||||
width = image.shape[1]
|
||||
assert image.shape[2] == 3
|
||||
|
||||
return image_data, height, width
|
||||
|
||||
|
||||
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
|
||||
synsets, labels, humans, bboxes, num_shards):
|
||||
"""Processes and saves list of images as TFRecord in 1 thread.
|
||||
|
||||
Args:
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
thread_index: integer, unique batch to run index is within [0, len(ranges)).
|
||||
ranges: list of pairs of integers specifying ranges of each batches to
|
||||
analyze in parallel.
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
synsets: list of strings; each string is a unique WordNet ID
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
humans: list of strings; each string is a human-readable label
|
||||
bboxes: list of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
# Each thread produces N shards where N = int(num_shards / num_threads).
|
||||
# For instance, if num_shards = 128, and the num_threads = 2, then the first
|
||||
# thread would produce shards [0, 64).
|
||||
num_threads = len(ranges)
|
||||
assert not num_shards % num_threads
|
||||
num_shards_per_batch = int(num_shards / num_threads)
|
||||
|
||||
shard_ranges = np.linspace(ranges[thread_index][0],
|
||||
ranges[thread_index][1],
|
||||
num_shards_per_batch + 1).astype(int)
|
||||
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
|
||||
|
||||
counter = 0
|
||||
for s in range(num_shards_per_batch):
|
||||
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
|
||||
shard = thread_index * num_shards_per_batch + s
|
||||
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
|
||||
output_file = os.path.join(FLAGS.output_directory, output_filename)
|
||||
writer = tf.python_io.TFRecordWriter(output_file)
|
||||
|
||||
shard_counter = 0
|
||||
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
|
||||
for i in files_in_shard:
|
||||
filename = filenames[i]
|
||||
label = labels[i]
|
||||
synset = synsets[i]
|
||||
human = humans[i]
|
||||
#bbox = bboxes[i]
|
||||
|
||||
image_buffer, height, width = _process_image(filename, coder)
|
||||
|
||||
example = _convert_to_example(filename, image_buffer, label,
|
||||
synset, human, [[0, 0, 1, 1]],
|
||||
height, width)
|
||||
writer.write(example.SerializeToString())
|
||||
shard_counter += 1
|
||||
counter += 1
|
||||
|
||||
if not counter % 1000:
|
||||
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
writer.close()
|
||||
print('%s [thread %d]: Wrote %d images to %s' %
|
||||
(datetime.now(), thread_index, shard_counter, output_file))
|
||||
sys.stdout.flush()
|
||||
shard_counter = 0
|
||||
print('%s [thread %d]: Wrote %d images to %d shards.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _process_image_files(name, filenames, synsets, labels, humans,
|
||||
bboxes, num_shards):
|
||||
"""Process and save list of images as TFRecord of Example protos.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
synsets: list of strings; each string is a unique WordNet ID
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
humans: list of strings; each string is a human-readable label
|
||||
bboxes: list of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
assert len(filenames) == len(synsets)
|
||||
assert len(filenames) == len(labels)
|
||||
assert len(filenames) == len(humans)
|
||||
#assert len(filenames) == len(bboxes)
|
||||
|
||||
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
|
||||
spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int)
|
||||
ranges = []
|
||||
threads = []
|
||||
for i in range(len(spacing) - 1):
|
||||
ranges.append([spacing[i], spacing[i + 1]])
|
||||
|
||||
# Launch a thread for each batch.
|
||||
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
|
||||
sys.stdout.flush()
|
||||
|
||||
# Create a mechanism for monitoring when all threads are finished.
|
||||
coord = tf.train.Coordinator()
|
||||
|
||||
# Create a generic TensorFlow-based utility for converting all image codings.
|
||||
coder = ImageCoder()
|
||||
|
||||
threads = []
|
||||
for thread_index in range(len(ranges)):
|
||||
args = (coder, thread_index, ranges, name, filenames,
|
||||
synsets, labels, humans, bboxes, num_shards)
|
||||
t = threading.Thread(target=_process_image_files_batch, args=args)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
# Wait for all the threads to terminate.
|
||||
coord.join(threads)
|
||||
print('%s: Finished writing all %d images in data set.' %
|
||||
(datetime.now(), len(filenames)))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _find_image_files(data_dir, labels_file):
|
||||
"""Build a list of all images files and labels in the data set.
|
||||
|
||||
Args:
|
||||
data_dir: string, path to the root directory of images.
|
||||
|
||||
Assumes that the ImageNet data set resides in JPEG files located in
|
||||
the following directory structure.
|
||||
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
|
||||
where 'n01440764' is the unique synset label associated with these images.
|
||||
|
||||
labels_file: string, path to the labels file.
|
||||
|
||||
The list of valid labels are held in this file. Assumes that the file
|
||||
contains entries as such:
|
||||
n01440764
|
||||
n01443537
|
||||
n01484850
|
||||
where each line corresponds to a label expressed as a synset. We map
|
||||
each synset contained in the file to an integer (based on the alphabetical
|
||||
ordering) starting with the integer 1 corresponding to the synset
|
||||
contained in the first line.
|
||||
|
||||
The reason we start the integer labels at 1 is to reserve label 0 as an
|
||||
unused background class.
|
||||
|
||||
Returns:
|
||||
filenames: list of strings; each string is a path to an image file.
|
||||
synsets: list of strings; each string is a unique WordNet ID.
|
||||
labels: list of integer; each integer identifies the ground truth.
|
||||
"""
|
||||
print('Determining list of input files and labels from %s.' % data_dir)
|
||||
challenge_synsets = [l.strip() for l in
|
||||
tf.gfile.FastGFile(labels_file, 'r').readlines()]
|
||||
|
||||
labels = []
|
||||
filenames = []
|
||||
synsets = []
|
||||
|
||||
# Leave label index 0 empty as a background class.
|
||||
label_index = 1
|
||||
|
||||
# Construct the list of JPEG files and labels.
|
||||
for synset in challenge_synsets:
|
||||
jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset)
|
||||
matching_files = tf.gfile.Glob(jpeg_file_path)
|
||||
|
||||
labels.extend([label_index] * len(matching_files))
|
||||
synsets.extend([synset] * len(matching_files))
|
||||
filenames.extend(matching_files)
|
||||
|
||||
if not label_index % 100:
|
||||
print('Finished finding files in %d of %d classes.' % (
|
||||
label_index, len(challenge_synsets)))
|
||||
label_index += 1
|
||||
|
||||
# Shuffle the ordering of all image files in order to guarantee
|
||||
# random ordering of the images with respect to label in the
|
||||
# saved TFRecord files. Make the randomization repeatable.
|
||||
shuffled_index = list(range(len(filenames)))
|
||||
random.seed(12345)
|
||||
random.shuffle(shuffled_index)
|
||||
|
||||
filenames = [filenames[i] for i in shuffled_index]
|
||||
synsets = [synsets[i] for i in shuffled_index]
|
||||
labels = [labels[i] for i in shuffled_index]
|
||||
|
||||
print('Found %d JPEG files across %d labels inside %s.' %
|
||||
(len(filenames), len(challenge_synsets), data_dir))
|
||||
return filenames, synsets, labels
|
||||
|
||||
|
||||
def _find_human_readable_labels(synsets, synset_to_human):
|
||||
"""Build a list of human-readable labels.
|
||||
|
||||
Args:
|
||||
synsets: list of strings; each string is a unique WordNet ID.
|
||||
synset_to_human: dict of synset to human labels, e.g.,
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
|
||||
Returns:
|
||||
List of human-readable strings corresponding to each synset.
|
||||
"""
|
||||
humans = []
|
||||
for s in synsets:
|
||||
assert s in synset_to_human, ('Failed to find: %s' % s)
|
||||
humans.append(synset_to_human[s])
|
||||
return humans
|
||||
|
||||
|
||||
def _process_dataset(name, directory, num_shards, synset_to_human,
|
||||
image_to_bboxes):
|
||||
"""Process a complete data set and save it as a TFRecord.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set.
|
||||
directory: string, root path to the data set.
|
||||
num_shards: integer number of shards for this data set.
|
||||
synset_to_human: dict of synset to human labels, e.g.,
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
image_to_bboxes: dictionary mapping image file names to a list of
|
||||
bounding boxes. This list contains 0+ bounding boxes.
|
||||
"""
|
||||
filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
|
||||
humans = _find_human_readable_labels(synsets, synset_to_human)
|
||||
#bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes)
|
||||
bboxes = []
|
||||
_process_image_files(name, filenames, synsets, labels,
|
||||
humans, bboxes, num_shards)
|
||||
|
||||
|
||||
def _build_synset_lookup(imagenet_metadata_file):
|
||||
"""Build lookup for synset to human-readable label.
|
||||
|
||||
Args:
|
||||
imagenet_metadata_file: string, path to file containing mapping from
|
||||
synset to human-readable label.
|
||||
|
||||
Assumes each line of the file looks like:
|
||||
|
||||
n02119247 black fox
|
||||
n02119359 silver fox
|
||||
n02119477 red fox, Vulpes fulva
|
||||
|
||||
where each line corresponds to a unique mapping. Note that each line is
|
||||
formatted as <synset>\t<human readable label>.
|
||||
|
||||
Returns:
|
||||
Dictionary of synset to human labels, such as:
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
"""
|
||||
lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines()
|
||||
synset_to_human = {}
|
||||
for l in lines:
|
||||
if l:
|
||||
parts = l.strip().split('\t')
|
||||
assert len(parts) == 2
|
||||
synset = parts[0]
|
||||
human = parts[1]
|
||||
synset_to_human[synset] = human
|
||||
return synset_to_human
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
assert not FLAGS.train_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
|
||||
assert not FLAGS.validation_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with '
|
||||
'FLAGS.validation_shards')
|
||||
print('Saving results to %s' % FLAGS.output_directory)
|
||||
|
||||
# Build a map from synset to human-readable label.
|
||||
synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file)
|
||||
|
||||
# Run it!
|
||||
_process_dataset('validation', FLAGS.validation_directory,
|
||||
FLAGS.validation_shards, synset_to_human, None)
|
||||
_process_dataset('train', FLAGS.train_directory, FLAGS.train_shards,
|
||||
synset_to_human, None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
21842
TensorFlow/Classification/ConvNets/dataprep/imagenet_metadata.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
n02086240
|
||||
n02087394
|
||||
n02088364
|
||||
n02089973
|
||||
n02093754
|
||||
n02096294
|
||||
n02099601
|
||||
n02105641
|
||||
n02111889
|
||||
n02115641
|
82
TensorFlow/Classification/ConvNets/dataprep/preprocess_imagenet.sh
Executable file
|
@ -0,0 +1,82 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
# Script to download and preprocess ImageNet Challenge 2012
|
||||
# training and validation data set.
|
||||
#
|
||||
# The final output of this script are sharded TFRecord files containing
|
||||
# serialized Example protocol buffers. See build_imagenet_data.py for
|
||||
# details of how the Example protocol buffers contain the ImageNet data.
|
||||
#
|
||||
# The final output of this script appears as such:
|
||||
#
|
||||
# data_dir/train-00000-of-01024
|
||||
# data_dir/train-00001-of-01024
|
||||
# ...
|
||||
# data_dir/train-01023-of-01024
|
||||
#
|
||||
# and
|
||||
#
|
||||
# data_dir/validation-00000-of-00128
|
||||
# data_dir/validation-00001-of-00128
|
||||
# ...
|
||||
# data_dir/validation-00127-of-00128
|
||||
#
|
||||
# Note that this script may take several hours to run to completion. The
|
||||
# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending
|
||||
# on the speed of your machine. Please be patient.
|
||||
#
|
||||
# **IMPORTANT**
|
||||
# To download the raw images, the user must create an account with image-net.org
|
||||
# and generate a username and access_key. The latter two are required for
|
||||
# downloading the raw images.
|
||||
#
|
||||
# usage:
|
||||
# ./preprocess_imagenet.sh [data-dir]
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: preprocess_imagenet.sh [data dir]"
|
||||
exit
|
||||
fi
|
||||
|
||||
DATA_DIR="${1%/}"
|
||||
SCRATCH_DIR="${DATA_DIR}/raw-data/"
|
||||
mkdir -p ${SCRATCH_DIR}
|
||||
|
||||
# Convert the XML files for bounding box annotations into a single CSV.
|
||||
echo "Extracting bounding box information from XML."
|
||||
BOUNDING_BOX_SCRIPT="./dataprep/process_bounding_boxes.py"
|
||||
BOUNDING_BOX_FILE="${DATA_DIR}/imagenet_2012_bounding_boxes.csv"
|
||||
BOUNDING_BOX_DIR="${DATA_DIR}/bounding_boxes/"
|
||||
|
||||
LABELS_FILE="./dataprep/imagenet_lsvrc_2015_synsets.txt"
|
||||
|
||||
"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \
|
||||
| sort > "${BOUNDING_BOX_FILE}"
|
||||
echo "preprocessing the ImageNet data."
|
||||
|
||||
# Build the TFRecords version of the ImageNet data.
|
||||
OUTPUT_DIRECTORY="${DATA_DIR}"
|
||||
IMAGENET_METADATA_FILE="./dataprep/imagenet_metadata.txt"
|
||||
|
||||
python ./dataprep/build_imagenet_data.py \
|
||||
--train_directory="${DATA_DIR}/train" \
|
||||
--validation_directory="${DATA_DIR}/val" \
|
||||
--output_directory="${DATA_DIR}/result" \
|
||||
--imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \
|
||||
--labels_file="${LABELS_FILE}" \
|
||||
--bounding_box_file="${BOUNDING_BOX_FILE}"
|
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Process the ImageNet Challenge bounding boxes for TensorFlow model training.
|
||||
|
||||
Associate the ImageNet 2012 Challenge validation data set with labels.
|
||||
|
||||
The raw ImageNet validation data set is expected to reside in JPEG files
|
||||
located in the following directory structure.
|
||||
|
||||
data_dir/ILSVRC2012_val_00000001.JPEG
|
||||
data_dir/ILSVRC2012_val_00000002.JPEG
|
||||
...
|
||||
data_dir/ILSVRC2012_val_00050000.JPEG
|
||||
|
||||
This script moves the files into a directory structure like such:
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
...
|
||||
where 'n01440764' is the unique synset label associated with
|
||||
these images.
|
||||
|
||||
This directory reorganization requires a mapping from validation image
|
||||
number (i.e. suffix of the original file) to the associated label. This
|
||||
is provided in the ImageNet development kit via a Matlab file.
|
||||
|
||||
In order to make life easier and divorce ourselves from Matlab, we instead
|
||||
supply a custom text file that provides this mapping for us.
|
||||
|
||||
Sample usage:
|
||||
./preprocess_imagenet_validation_data.py ILSVRC2012_img_val \
|
||||
imagenet_2012_validation_synset_labels.txt
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import errno
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 3:
|
||||
print('Invalid usage\n'
|
||||
'usage: preprocess_imagenet_validation_data.py '
|
||||
'<validation data dir> <validation labels file>')
|
||||
sys.exit(-1)
|
||||
data_dir = sys.argv[1]
|
||||
validation_labels_file = sys.argv[2]
|
||||
|
||||
# Read in the 50000 synsets associated with the validation data set.
|
||||
labels = [l.strip() for l in open(validation_labels_file).readlines()]
|
||||
unique_labels = set(labels)
|
||||
|
||||
# Make all sub-directories in the validation data dir.
|
||||
for label in unique_labels:
|
||||
labeled_data_dir = os.path.join(data_dir, label)
|
||||
# Catch error if sub-directory exists
|
||||
try:
|
||||
os.makedirs(labeled_data_dir)
|
||||
except OSError as e:
|
||||
# Raise all errors but 'EEXIST'
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
# Move all of the image to the appropriate sub-directory.
|
||||
for i in range(len(labels)):
|
||||
basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1)
|
||||
original_filename = os.path.join(data_dir, basename)
|
||||
if not os.path.exists(original_filename):
|
||||
print('Failed to find: %s' % original_filename)
|
||||
sys.exit(-1)
|
||||
new_filename = os.path.join(data_dir, labels[i], basename)
|
||||
os.rename(original_filename, new_filename)
|
254
TensorFlow/Classification/ConvNets/dataprep/process_bounding_boxes.py
Executable file
|
@ -0,0 +1,254 @@
|
|||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Process the ImageNet Challenge bounding boxes for TensorFlow model training.
|
||||
|
||||
This script is called as
|
||||
|
||||
process_bounding_boxes.py <dir> [synsets-file]
|
||||
|
||||
Where <dir> is a directory containing the downloaded and unpacked bounding box
|
||||
data. If [synsets-file] is supplied, then only the bounding boxes whose
|
||||
synstes are contained within this file are returned. Note that the
|
||||
[synsets-file] file contains synset ids, one per line.
|
||||
|
||||
The script dumps out a CSV text file in which each line contains an entry.
|
||||
n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
|
||||
|
||||
The entry can be read as:
|
||||
<JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
|
||||
|
||||
The bounding box for <JPEG file name> contains two points (xmin, ymin) and
|
||||
(xmax, ymax) specifying the lower-left corner and upper-right corner of a
|
||||
bounding box in *relative* coordinates.
|
||||
|
||||
The user supplies a directory where the XML files reside. The directory
|
||||
structure in the directory <dir> is assumed to look like this:
|
||||
|
||||
<dir>/nXXXXXXXX/nXXXXXXXX_YYYY.xml
|
||||
|
||||
Each XML file contains a bounding box annotation. The script:
|
||||
|
||||
(1) Parses the XML file and extracts the filename, label and bounding box info.
|
||||
|
||||
(2) The bounding box is specified in the XML files as integer (xmin, ymin) and
|
||||
(xmax, ymax) *relative* to image size displayed to the human annotator. The
|
||||
size of the image displayed to the human annotator is stored in the XML file
|
||||
as integer (height, width).
|
||||
|
||||
Note that the displayed size will differ from the actual size of the image
|
||||
downloaded from image-net.org. To make the bounding box annotation useable,
|
||||
we convert bounding box to floating point numbers relative to displayed
|
||||
height and width of the image.
|
||||
|
||||
Note that each XML file might contain N bounding box annotations.
|
||||
|
||||
Note that the points are all clamped at a range of [0.0, 1.0] because some
|
||||
human annotations extend outside the range of the supplied image.
|
||||
|
||||
See details here: http://image-net.org/download-bboxes
|
||||
|
||||
(3) By default, the script outputs all valid bounding boxes. If a
|
||||
[synsets-file] is supplied, only the subset of bounding boxes associated
|
||||
with those synsets are outputted. Importantly, one can supply a list of
|
||||
synsets in the ImageNet Challenge and output the list of bounding boxes
|
||||
associated with the training images of the ILSVRC.
|
||||
|
||||
We use these bounding boxes to inform the random distortion of images
|
||||
supplied to the network.
|
||||
|
||||
If you run this script successfully, you will see the following output
|
||||
to stderr:
|
||||
> Finished processing 544546 XML files.
|
||||
> Skipped 0 XML files not in ImageNet Challenge.
|
||||
> Skipped 0 bounding boxes not in ImageNet Challenge.
|
||||
> Wrote 615299 bounding boxes from 544546 annotated images.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import glob
|
||||
import os.path
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
class BoundingBox(object):
|
||||
pass
|
||||
|
||||
|
||||
def GetItem(name, root, index=0):
|
||||
count = 0
|
||||
for item in root.iter(name):
|
||||
if count == index:
|
||||
return item.text
|
||||
count += 1
|
||||
# Failed to find "index" occurrence of item.
|
||||
return -1
|
||||
|
||||
|
||||
def GetInt(name, root, index=0):
|
||||
# In some XML annotation files, the point values are not integers, but floats.
|
||||
# So we add a float function to avoid ValueError.
|
||||
return int(float(GetItem(name, root, index)))
|
||||
|
||||
|
||||
def FindNumberBoundingBoxes(root):
|
||||
index = 0
|
||||
while True:
|
||||
if GetInt('xmin', root, index) == -1:
|
||||
break
|
||||
index += 1
|
||||
return index
|
||||
|
||||
|
||||
def ProcessXMLAnnotation(xml_file):
|
||||
"""Process a single XML file containing a bounding box."""
|
||||
# pylint: disable=broad-except
|
||||
try:
|
||||
tree = ET.parse(xml_file)
|
||||
except Exception:
|
||||
print('Failed to parse: ' + xml_file, file=sys.stderr)
|
||||
return None
|
||||
# pylint: enable=broad-except
|
||||
root = tree.getroot()
|
||||
|
||||
num_boxes = FindNumberBoundingBoxes(root)
|
||||
boxes = []
|
||||
|
||||
for index in range(num_boxes):
|
||||
box = BoundingBox()
|
||||
# Grab the 'index' annotation.
|
||||
box.xmin = GetInt('xmin', root, index)
|
||||
box.ymin = GetInt('ymin', root, index)
|
||||
box.xmax = GetInt('xmax', root, index)
|
||||
box.ymax = GetInt('ymax', root, index)
|
||||
|
||||
box.width = GetInt('width', root)
|
||||
box.height = GetInt('height', root)
|
||||
box.filename = GetItem('filename', root) + '.JPEG'
|
||||
box.label = GetItem('name', root)
|
||||
|
||||
xmin = float(box.xmin) / float(box.width)
|
||||
xmax = float(box.xmax) / float(box.width)
|
||||
ymin = float(box.ymin) / float(box.height)
|
||||
ymax = float(box.ymax) / float(box.height)
|
||||
|
||||
# Some images contain bounding box annotations that
|
||||
# extend outside of the supplied image. See, e.g.
|
||||
# n03127925/n03127925_147.xml
|
||||
# Additionally, for some bounding boxes, the min > max
|
||||
# or the box is entirely outside of the image.
|
||||
min_x = min(xmin, xmax)
|
||||
max_x = max(xmin, xmax)
|
||||
box.xmin_scaled = min(max(min_x, 0.0), 1.0)
|
||||
box.xmax_scaled = min(max(max_x, 0.0), 1.0)
|
||||
|
||||
min_y = min(ymin, ymax)
|
||||
max_y = max(ymin, ymax)
|
||||
box.ymin_scaled = min(max(min_y, 0.0), 1.0)
|
||||
box.ymax_scaled = min(max(max_y, 0.0), 1.0)
|
||||
|
||||
boxes.append(box)
|
||||
|
||||
return boxes
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2 or len(sys.argv) > 3:
|
||||
print('Invalid usage\n'
|
||||
'usage: process_bounding_boxes.py <dir> [synsets-file]',
|
||||
file=sys.stderr)
|
||||
sys.exit(-1)
|
||||
|
||||
xml_files = glob.glob(sys.argv[1] + '/*/*.xml')
|
||||
print('Identified %d XML files in %s' % (len(xml_files), sys.argv[1]),
|
||||
file=sys.stderr)
|
||||
|
||||
if len(sys.argv) == 3:
|
||||
labels = set([l.strip() for l in open(sys.argv[2]).readlines()])
|
||||
print('Identified %d synset IDs in %s' % (len(labels), sys.argv[2]),
|
||||
file=sys.stderr)
|
||||
else:
|
||||
labels = None
|
||||
|
||||
skipped_boxes = 0
|
||||
skipped_files = 0
|
||||
saved_boxes = 0
|
||||
saved_files = 0
|
||||
for file_index, one_file in enumerate(xml_files):
|
||||
# Example: <...>/n06470073/n00141669_6790.xml
|
||||
label = os.path.basename(os.path.dirname(one_file))
|
||||
|
||||
# Determine if the annotation is from an ImageNet Challenge label.
|
||||
if labels is not None and label not in labels:
|
||||
skipped_files += 1
|
||||
continue
|
||||
|
||||
bboxes = ProcessXMLAnnotation(one_file)
|
||||
assert bboxes is not None, 'No bounding boxes found in ' + one_file
|
||||
|
||||
found_box = False
|
||||
for bbox in bboxes:
|
||||
if labels is not None:
|
||||
if bbox.label != label:
|
||||
# Note: There is a slight bug in the bounding box annotation data.
|
||||
# Many of the dog labels have the human label 'Scottish_deerhound'
|
||||
# instead of the synset ID 'n02092002' in the bbox.label field. As a
|
||||
# simple hack to overcome this issue, we only exclude bbox labels
|
||||
# *which are synset ID's* that do not match original synset label for
|
||||
# the XML file.
|
||||
if bbox.label in labels:
|
||||
skipped_boxes += 1
|
||||
continue
|
||||
|
||||
# Guard against improperly specified boxes.
|
||||
if (bbox.xmin_scaled >= bbox.xmax_scaled or
|
||||
bbox.ymin_scaled >= bbox.ymax_scaled):
|
||||
skipped_boxes += 1
|
||||
continue
|
||||
|
||||
# Note bbox.filename occasionally contains '%s' in the name. This is
|
||||
# data set noise that is fixed by just using the basename of the XML file.
|
||||
image_filename = os.path.splitext(os.path.basename(one_file))[0]
|
||||
print('%s.JPEG,%.4f,%.4f,%.4f,%.4f' %
|
||||
(image_filename,
|
||||
bbox.xmin_scaled, bbox.ymin_scaled,
|
||||
bbox.xmax_scaled, bbox.ymax_scaled))
|
||||
|
||||
saved_boxes += 1
|
||||
found_box = True
|
||||
if found_box:
|
||||
saved_files += 1
|
||||
else:
|
||||
skipped_files += 1
|
||||
|
||||
if not file_index % 5000:
|
||||
print('--> processed %d of %d XML files.' %
|
||||
(file_index + 1, len(xml_files)),
|
||||
file=sys.stderr)
|
||||
print('--> skipped %d boxes and %d XML files.' %
|
||||
(skipped_boxes, skipped_files), file=sys.stderr)
|
||||
|
||||
print('Finished processing %d XML files.' % len(xml_files), file=sys.stderr)
|
||||
print('Skipped %d XML files not in ImageNet Challenge.' % skipped_files,
|
||||
file=sys.stderr)
|
||||
print('Skipped %d bounding boxes not in ImageNet Challenge.' % skipped_boxes,
|
||||
file=sys.stderr)
|
||||
print('Wrote %d bounding boxes from %d annotated images.' %
|
||||
(saved_boxes, saved_files),
|
||||
file=sys.stderr)
|
||||
print('Finished.', file=sys.stderr)
|
|
@ -42,12 +42,10 @@ if __name__ == "__main__":
|
|||
log_path = os.path.join(FLAGS.results_dir, FLAGS.log_filename)
|
||||
os.makedirs(FLAGS.results_dir, exist_ok=True)
|
||||
|
||||
dllogger.init(
|
||||
backends=[
|
||||
dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=log_path),
|
||||
dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)
|
||||
]
|
||||
)
|
||||
dllogger.init(backends=[
|
||||
dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=log_path),
|
||||
dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)
|
||||
])
|
||||
else:
|
||||
dllogger.init(backends=[])
|
||||
dllogger.log(data=vars(FLAGS), step='PARAMETER')
|
||||
|
@ -58,49 +56,46 @@ if __name__ == "__main__":
|
|||
architecture=FLAGS.arch,
|
||||
input_format='NHWC',
|
||||
compute_format=FLAGS.data_format,
|
||||
dtype=tf.float32 if FLAGS.precision == 'fp32' else tf.float16,
|
||||
dtype=tf.float32,
|
||||
n_channels=3,
|
||||
height=224,
|
||||
width=224,
|
||||
height=224 if FLAGS.data_dir else FLAGS.synthetic_data_size,
|
||||
width=224 if FLAGS.data_dir else FLAGS.synthetic_data_size,
|
||||
distort_colors=False,
|
||||
log_dir=FLAGS.results_dir,
|
||||
model_dir=FLAGS.model_dir if FLAGS.model_dir is not None else FLAGS.results_dir,
|
||||
data_dir=FLAGS.data_dir,
|
||||
data_idx_dir=FLAGS.data_idx_dir,
|
||||
weight_init=FLAGS.weight_init,
|
||||
use_xla=FLAGS.use_xla,
|
||||
use_tf_amp=FLAGS.use_tf_amp,
|
||||
use_dali=FLAGS.use_dali,
|
||||
use_xla=FLAGS.xla,
|
||||
use_tf_amp=FLAGS.amp,
|
||||
use_dali=FLAGS.dali,
|
||||
gpu_memory_fraction=FLAGS.gpu_memory_fraction,
|
||||
gpu_id=FLAGS.gpu_id,
|
||||
seed=FLAGS.seed
|
||||
)
|
||||
seed=FLAGS.seed)
|
||||
|
||||
if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]:
|
||||
runner.train(
|
||||
iter_unit=FLAGS.iter_unit,
|
||||
num_iter=FLAGS.num_iter,
|
||||
run_iter=FLAGS.run_iter,
|
||||
batch_size=FLAGS.batch_size,
|
||||
warmup_steps=FLAGS.warmup_steps,
|
||||
log_every_n_steps=FLAGS.display_every,
|
||||
weight_decay=FLAGS.weight_decay,
|
||||
lr_init=FLAGS.lr_init,
|
||||
lr_warmup_epochs=FLAGS.lr_warmup_epochs,
|
||||
momentum=FLAGS.momentum,
|
||||
loss_scale=FLAGS.loss_scale,
|
||||
label_smoothing=FLAGS.label_smoothing,
|
||||
mixup=FLAGS.mixup,
|
||||
use_static_loss_scaling=FLAGS.use_static_loss_scaling,
|
||||
use_cosine_lr=FLAGS.use_cosine_lr,
|
||||
is_benchmark=FLAGS.mode == 'training_benchmark',
|
||||
use_final_conv=FLAGS.use_final_conv,
|
||||
quantize=FLAGS.quantize,
|
||||
symmetric=FLAGS.symmetric,
|
||||
quant_delay = FLAGS.quant_delay,
|
||||
use_qdq = FLAGS.use_qdq,
|
||||
finetune_checkpoint=FLAGS.finetune_checkpoint,
|
||||
)
|
||||
runner.train(iter_unit=FLAGS.iter_unit,
|
||||
num_iter=FLAGS.num_iter,
|
||||
run_iter=FLAGS.run_iter,
|
||||
batch_size=FLAGS.batch_size,
|
||||
warmup_steps=FLAGS.warmup_steps,
|
||||
log_every_n_steps=FLAGS.display_every,
|
||||
weight_decay=FLAGS.weight_decay,
|
||||
lr_init=FLAGS.lr_init,
|
||||
lr_warmup_epochs=FLAGS.lr_warmup_epochs,
|
||||
momentum=FLAGS.momentum,
|
||||
loss_scale=FLAGS.static_loss_scale,
|
||||
label_smoothing=FLAGS.label_smoothing,
|
||||
mixup=FLAGS.mixup,
|
||||
use_static_loss_scaling=(FLAGS.static_loss_scale != -1),
|
||||
use_cosine_lr=FLAGS.cosine_lr,
|
||||
is_benchmark=FLAGS.mode == 'training_benchmark',
|
||||
use_final_conv=FLAGS.use_final_conv,
|
||||
quantize=FLAGS.quantize,
|
||||
symmetric=FLAGS.symmetric,
|
||||
quant_delay=FLAGS.quant_delay,
|
||||
use_qdq=FLAGS.use_qdq,
|
||||
finetune_checkpoint=FLAGS.finetune_checkpoint)
|
||||
|
||||
if FLAGS.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:
|
||||
|
||||
|
@ -109,19 +104,17 @@ if __name__ == "__main__":
|
|||
|
||||
elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
|
||||
|
||||
runner.evaluate(
|
||||
iter_unit=FLAGS.iter_unit if FLAGS.mode != "train_and_evaluate" else "epoch",
|
||||
num_iter=FLAGS.num_iter if FLAGS.mode != "train_and_evaluate" else 1,
|
||||
warmup_steps=FLAGS.warmup_steps,
|
||||
batch_size=FLAGS.batch_size,
|
||||
log_every_n_steps=FLAGS.display_every,
|
||||
is_benchmark=FLAGS.mode == 'inference_benchmark',
|
||||
export_dir=FLAGS.export_dir,
|
||||
quantize=FLAGS.quantize,
|
||||
symmetric=FLAGS.symmetric,
|
||||
use_final_conv=FLAGS.use_final_conv,
|
||||
use_qdq=FLAGS.use_qdq
|
||||
)
|
||||
runner.evaluate(iter_unit=FLAGS.iter_unit if FLAGS.mode != "train_and_evaluate" else "epoch",
|
||||
num_iter=FLAGS.num_iter if FLAGS.mode != "train_and_evaluate" else 1,
|
||||
warmup_steps=FLAGS.warmup_steps,
|
||||
batch_size=FLAGS.batch_size,
|
||||
log_every_n_steps=FLAGS.display_every,
|
||||
is_benchmark=FLAGS.mode == 'inference_benchmark',
|
||||
export_dir=FLAGS.export_dir,
|
||||
quantize=FLAGS.quantize,
|
||||
symmetric=FLAGS.symmetric,
|
||||
use_final_conv=FLAGS.use_final_conv,
|
||||
use_qdq=FLAGS.use_qdq)
|
||||
|
||||
if FLAGS.mode == 'predict':
|
||||
if FLAGS.to_predict is None:
|
||||
|
@ -134,4 +127,8 @@ if __name__ == "__main__":
|
|||
raise NotImplementedError("Only single GPU inference is implemented.")
|
||||
|
||||
elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
|
||||
runner.predict(FLAGS.to_predict, quantize=FLAGS.quantize, symmetric=FLAGS.symmetric, use_qdq=FLAGS.use_qdq, use_final_conv=FLAGS.use_final_conv)
|
||||
runner.predict(FLAGS.to_predict,
|
||||
quantize=FLAGS.quantize,
|
||||
symmetric=FLAGS.symmetric,
|
||||
use_qdq=FLAGS.use_qdq,
|
||||
use_final_conv=FLAGS.use_final_conv)
|
||||
|
|
|
@ -29,7 +29,7 @@ def conv2d(
|
|||
data_format='NHWC',
|
||||
dilation_rate=(1, 1),
|
||||
use_bias=True,
|
||||
kernel_initializer=tf.variance_scaling_initializer(),
|
||||
kernel_initializer=tf.compat.v1.variance_scaling_initializer(),
|
||||
bias_initializer=tf.zeros_initializer(),
|
||||
trainable=True,
|
||||
name=None
|
||||
|
@ -56,6 +56,5 @@ def conv2d(
|
|||
activation=None,
|
||||
name=name
|
||||
)
|
||||
|
||||
return net
|
||||
|
||||
return net
|
||||
|
|
|
@ -22,7 +22,7 @@ def dense(
|
|||
units,
|
||||
use_bias=True,
|
||||
trainable=True,
|
||||
kernel_initializer=tf.variance_scaling_initializer(),
|
||||
kernel_initializer=tf.compat.v1.variance_scaling_initializer(),
|
||||
bias_initializer=tf.zeros_initializer()
|
||||
):
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ def squeeze_excitation_layer(
|
|||
ratio,
|
||||
training=True,
|
||||
data_format='NCHW',
|
||||
kernel_initializer=tf.variance_scaling_initializer(),
|
||||
kernel_initializer=tf.compat.v1.variance_scaling_initializer(),
|
||||
bias_initializer=tf.zeros_initializer(),
|
||||
name="squeeze_excitation_layer"
|
||||
):
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
@ -34,7 +33,6 @@ from utils.data_utils import normalized_inputs
|
|||
from utils.learning_rate import learning_rate_scheduler
|
||||
from utils.optimizers import FixedLossScalerOptimizer
|
||||
|
||||
|
||||
__all__ = [
|
||||
'ResnetModel',
|
||||
]
|
||||
|
@ -89,14 +87,14 @@ class ResnetModel(object):
|
|||
)
|
||||
|
||||
self.conv2d_hparams = tf.contrib.training.HParams(
|
||||
kernel_initializer=tf.variance_scaling_initializer(
|
||||
kernel_initializer=tf.compat.v1.variance_scaling_initializer(
|
||||
scale=2.0, distribution='truncated_normal', mode=weight_init
|
||||
),
|
||||
bias_initializer=tf.constant_initializer(0.0)
|
||||
)
|
||||
|
||||
self.dense_hparams = tf.contrib.training.HParams(
|
||||
kernel_initializer=tf.variance_scaling_initializer(
|
||||
kernel_initializer=tf.compat.v1.variance_scaling_initializer(
|
||||
scale=2.0, distribution='truncated_normal', mode=weight_init
|
||||
),
|
||||
bias_initializer=tf.constant_initializer(0.0)
|
||||
|
@ -109,12 +107,13 @@ class ResnetModel(object):
|
|||
print("Input_format", input_format)
|
||||
print("dtype", str(dtype))
|
||||
|
||||
|
||||
def __call__(self, features, labels, mode, params):
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
mandatory_params = ["batch_size", "lr_init", "num_gpus", "steps_per_epoch",
|
||||
"momentum", "weight_decay", "loss_scale", "label_smoothing"]
|
||||
mandatory_params = [
|
||||
"batch_size", "lr_init", "num_gpus", "steps_per_epoch", "momentum", "weight_decay", "loss_scale",
|
||||
"label_smoothing"
|
||||
]
|
||||
for p in mandatory_params:
|
||||
if p not in params:
|
||||
raise RuntimeError("Parameter {} is missing.".format(p))
|
||||
|
@ -141,43 +140,46 @@ class ResnetModel(object):
|
|||
|
||||
mixup = 0
|
||||
eta = 0
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
eta = params['label_smoothing']
|
||||
mixup = params['mixup']
|
||||
|
||||
if mode != tf.estimator.ModeKeys.PREDICT:
|
||||
one_hot_smoothed_labels = tf.one_hot(labels, 1001,
|
||||
on_value = 1 - eta + eta/1001,
|
||||
off_value = eta/1001)
|
||||
|
||||
if mode != tf.estimator.ModeKeys.PREDICT:
|
||||
n_cls = self.model_hparams.n_classes
|
||||
one_hot_smoothed_labels = tf.one_hot(labels, n_cls,
|
||||
on_value=1 - eta + eta / n_cls, off_value=eta / n_cls)
|
||||
if mixup != 0:
|
||||
|
||||
print("Using mixup training with beta=", params['mixup'])
|
||||
beta_distribution = tf.distributions.Beta(params['mixup'], params['mixup'])
|
||||
|
||||
feature_coefficients = beta_distribution.sample(sample_shape=[params['batch_size'], 1, 1, 1])
|
||||
feature_coefficients = beta_distribution.sample(sample_shape=[params['batch_size'], 1, 1, 1])
|
||||
|
||||
reversed_feature_coefficients = tf.subtract(tf.ones(shape=feature_coefficients.shape), feature_coefficients)
|
||||
reversed_feature_coefficients = tf.subtract(
|
||||
tf.ones(shape=feature_coefficients.shape), feature_coefficients
|
||||
)
|
||||
|
||||
rotated_features = tf.reverse(features, axis=[0])
|
||||
rotated_features = tf.reverse(features, axis=[0])
|
||||
|
||||
features = feature_coefficients * features + reversed_feature_coefficients * rotated_features
|
||||
|
||||
label_coefficients = tf.squeeze(feature_coefficients, axis=[2, 3])
|
||||
|
||||
rotated_labels = tf.reverse(one_hot_smoothed_labels, axis=[0])
|
||||
rotated_labels = tf.reverse(one_hot_smoothed_labels, axis=[0])
|
||||
|
||||
reversed_label_coefficients = tf.subtract(tf.ones(shape=label_coefficients.shape), label_coefficients)
|
||||
reversed_label_coefficients = tf.subtract(
|
||||
tf.ones(shape=label_coefficients.shape), label_coefficients
|
||||
)
|
||||
|
||||
one_hot_smoothed_labels = label_coefficients * one_hot_smoothed_labels + reversed_label_coefficients * rotated_labels
|
||||
|
||||
|
||||
|
||||
# Update Global Step
|
||||
global_step = tf.train.get_or_create_global_step()
|
||||
tf.identity(global_step, name="global_step_ref")
|
||||
|
||||
tf.identity(features, name="features_ref")
|
||||
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
tf.identity(labels, name="labels_ref")
|
||||
|
||||
|
@ -202,16 +204,31 @@ class ResnetModel(object):
|
|||
tf.identity(probs, name="probs_ref")
|
||||
tf.identity(y_preds, name="y_preds_ref")
|
||||
|
||||
#if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
#
|
||||
# assert (len(tf.trainable_variables()) == 161)
|
||||
#
|
||||
#else:
|
||||
#
|
||||
# assert (len(tf.trainable_variables()) == 0)
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN and params['quantize']:
|
||||
dllogger.log(data={"QUANTIZATION AWARE TRAINING ENABLED": True}, step=tuple())
|
||||
if params['symmetric']:
|
||||
dllogger.log(data={"MODE":"USING SYMMETRIC MODE"}, step=tuple())
|
||||
tf.contrib.quantize.experimental_create_training_graph(tf.get_default_graph(), symmetric=True, use_qdq=params['use_qdq'] ,quant_delay=params['quant_delay'])
|
||||
dllogger.log(data={"MODE": "USING SYMMETRIC MODE"}, step=tuple())
|
||||
tf.contrib.quantize.experimental_create_training_graph(
|
||||
tf.get_default_graph(),
|
||||
symmetric=True,
|
||||
use_qdq=params['use_qdq'],
|
||||
quant_delay=params['quant_delay']
|
||||
)
|
||||
else:
|
||||
dllogger.log(data={"MODE":"USING ASSYMETRIC MODE"}, step=tuple())
|
||||
tf.contrib.quantize.create_training_graph(tf.get_default_graph(), quant_delay=params['quant_delay'], use_qdq=params['use_qdq'])
|
||||
|
||||
# Fix for restoring variables during fine-tuning of Resnet-50
|
||||
dllogger.log(data={"MODE": "USING ASSYMETRIC MODE"}, step=tuple())
|
||||
tf.contrib.quantize.create_training_graph(
|
||||
tf.get_default_graph(), quant_delay=params['quant_delay'], use_qdq=params['use_qdq']
|
||||
)
|
||||
|
||||
# Fix for restoring variables during fine-tuning of Resnet
|
||||
if 'finetune_checkpoint' in params.keys():
|
||||
train_vars = tf.trainable_variables()
|
||||
train_var_dict = {}
|
||||
|
@ -220,6 +237,13 @@ class ResnetModel(object):
|
|||
dllogger.log(data={"Restoring variables from checkpoint": params['finetune_checkpoint']}, step=tuple())
|
||||
tf.train.init_from_checkpoint(params['finetune_checkpoint'], train_var_dict)
|
||||
|
||||
with tf.device("/cpu:0"):
|
||||
if hvd_utils.is_using_hvd():
|
||||
sync_var = tf.Variable(initial_value=[0], dtype=tf.int32, name="signal_handler_var")
|
||||
sync_var_assing = sync_var.assign([1], name="signal_handler_var_set")
|
||||
sync_var_reset = sync_var.assign([0], name="signal_handler_var_reset")
|
||||
sync_op = hvd.allreduce(sync_var, op=hvd.Sum, name="signal_handler_all_reduce")
|
||||
|
||||
if mode == tf.estimator.ModeKeys.PREDICT:
|
||||
|
||||
predictions = {'classes': y_preds, 'probabilities': probs}
|
||||
|
@ -239,8 +263,12 @@ class ResnetModel(object):
|
|||
acc_top5 = tf.nn.in_top_k(predictions=logits, targets=labels, k=5)
|
||||
|
||||
else:
|
||||
acc_top1, acc_top1_update_op = tf.metrics.mean(tf.nn.in_top_k(predictions=logits, targets=labels, k=1))
|
||||
acc_top5, acc_top5_update_op = tf.metrics.mean(tf.nn.in_top_k(predictions=logits, targets=labels, k=5))
|
||||
acc_top1, acc_top1_update_op = tf.metrics.mean(
|
||||
tf.nn.in_top_k(predictions=logits, targets=labels, k=1)
|
||||
)
|
||||
acc_top5, acc_top5_update_op = tf.metrics.mean(
|
||||
tf.nn.in_top_k(predictions=logits, targets=labels, k=5)
|
||||
)
|
||||
|
||||
tf.identity(acc_top1, name="acc_top1_ref")
|
||||
tf.identity(acc_top5, name="acc_top5_ref")
|
||||
|
@ -251,20 +279,21 @@ class ResnetModel(object):
|
|||
'accuracy_top1': acc_top1,
|
||||
'accuracy_top5': acc_top5
|
||||
}
|
||||
|
||||
cross_entropy = tf.losses.softmax_cross_entropy(
|
||||
logits=logits, onehot_labels=one_hot_smoothed_labels)
|
||||
|
||||
cross_entropy = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=one_hot_smoothed_labels)
|
||||
|
||||
assert (cross_entropy.dtype == tf.float32)
|
||||
tf.identity(cross_entropy, name='cross_entropy_loss_ref')
|
||||
|
||||
def loss_filter_fn(name):
|
||||
"""we don't need to compute L2 loss for BN and bias (eq. to add a cste)"""
|
||||
return all([
|
||||
tensor_name not in name.lower()
|
||||
# for tensor_name in ["batchnorm", "batch_norm", "batch_normalization", "bias"]
|
||||
for tensor_name in ["batchnorm", "batch_norm", "batch_normalization"]
|
||||
])
|
||||
return all(
|
||||
[
|
||||
tensor_name not in name.lower()
|
||||
# for tensor_name in ["batchnorm", "batch_norm", "batch_normalization", "bias"]
|
||||
for tensor_name in ["batchnorm", "batch_norm", "batch_normalization"]
|
||||
]
|
||||
)
|
||||
|
||||
filtered_params = [tf.cast(v, tf.float32) for v in tf.trainable_variables() if loss_filter_fn(v.name)]
|
||||
|
||||
|
@ -287,7 +316,7 @@ class ResnetModel(object):
|
|||
tf.summary.scalar('cross_entropy', cross_entropy)
|
||||
tf.summary.scalar('l2_loss', l2_loss)
|
||||
tf.summary.scalar('total_loss', total_loss)
|
||||
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
|
||||
with tf.device("/cpu:0"):
|
||||
|
@ -317,17 +346,18 @@ class ResnetModel(object):
|
|||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
if mode != tf.estimator.ModeKeys.TRAIN:
|
||||
update_ops += [acc_top1_update_op, acc_top5_update_op]
|
||||
|
||||
|
||||
deterministic = True
|
||||
gate_gradients = (tf.train.Optimizer.GATE_OP if deterministic else tf.train.Optimizer.GATE_NONE)
|
||||
gate_gradients = (tf.compat.v1.train.Optimizer.GATE_OP if deterministic else tf.compat.v1.train.Optimizer.GATE_NONE)
|
||||
|
||||
backprop_op = optimizer.minimize(total_loss, gate_gradients=gate_gradients, global_step=global_step)
|
||||
|
||||
|
||||
if self.model_hparams.use_dali:
|
||||
train_ops = tf.group(backprop_op, update_ops, name='train_ops')
|
||||
else:
|
||||
train_ops = tf.group(backprop_op, cpu_prefetch_op, gpu_prefetch_op, update_ops, name='train_ops')
|
||||
train_ops = tf.group(
|
||||
backprop_op, cpu_prefetch_op, gpu_prefetch_op, update_ops, name='train_ops'
|
||||
)
|
||||
|
||||
return tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_ops)
|
||||
|
||||
|
@ -338,23 +368,18 @@ class ResnetModel(object):
|
|||
}
|
||||
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode=mode,
|
||||
predictions=predictions,
|
||||
loss=total_loss,
|
||||
eval_metric_ops=eval_metrics
|
||||
mode=mode, predictions=predictions, loss=total_loss, eval_metric_ops=eval_metrics
|
||||
)
|
||||
|
||||
else:
|
||||
raise NotImplementedError('Unknown mode {}'.format(mode))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _stage(tensors):
|
||||
"""Stages the given tensors in a StagingArea for asynchronous put/get.
|
||||
"""
|
||||
stage_area = tf.contrib.staging.StagingArea(
|
||||
dtypes=[tensor.dtype for tensor in tensors],
|
||||
shapes=[tensor.get_shape() for tensor in tensors]
|
||||
dtypes=[tensor.dtype for tensor in tensors], shapes=[tensor.get_shape() for tensor in tensors]
|
||||
)
|
||||
|
||||
put_op = stage_area.put(tensors)
|
||||
|
@ -364,14 +389,11 @@ class ResnetModel(object):
|
|||
|
||||
return put_op, get_tensors
|
||||
|
||||
|
||||
|
||||
def build_model(self, inputs, training=True, reuse=False, use_final_conv=False):
|
||||
|
||||
|
||||
with var_storage.model_variable_scope(
|
||||
self.model_hparams.model_name,
|
||||
reuse=reuse,
|
||||
dtype=self.model_hparams.dtype):
|
||||
self.model_hparams.model_name, reuse=reuse, dtype=self.model_hparams.dtype
|
||||
):
|
||||
|
||||
with tf.variable_scope("input_reshape"):
|
||||
if self.model_hparams.input_format == 'NHWC' and self.model_hparams.compute_format == 'NCHW':
|
||||
|
@ -426,27 +448,29 @@ class ResnetModel(object):
|
|||
batch_norm_hparams=self.batch_norm_hparams,
|
||||
block_name="btlnck_block_%d_%d" % (block_id, layer_id),
|
||||
use_se=self.model_hparams.use_se,
|
||||
ratio=self.model_hparams.se_ratio)
|
||||
ratio=self.model_hparams.se_ratio
|
||||
)
|
||||
|
||||
with tf.variable_scope("output"):
|
||||
net = layers.reduce_mean(
|
||||
net, keepdims=use_final_conv, data_format=self.model_hparams.compute_format, name='spatial_mean')
|
||||
net, keepdims=False, data_format=self.model_hparams.compute_format, name='spatial_mean'
|
||||
)
|
||||
|
||||
if use_final_conv:
|
||||
logits = layers.conv2d(
|
||||
net,
|
||||
n_channels=self.model_hparams.n_classes,
|
||||
kernel_size=(1, 1),
|
||||
strides=(1, 1),
|
||||
padding='SAME',
|
||||
data_format=self.model_hparams.compute_format,
|
||||
dilation_rate=(1, 1),
|
||||
use_bias=True,
|
||||
kernel_initializer=self.dense_hparams.kernel_initializer,
|
||||
bias_initializer=self.dense_hparams.bias_initializer,
|
||||
trainable=training,
|
||||
name='dense'
|
||||
)
|
||||
net,
|
||||
n_channels=self.model_hparams.n_classes,
|
||||
kernel_size=(1, 1),
|
||||
strides=(1, 1),
|
||||
padding='SAME',
|
||||
data_format=self.model_hparams.compute_format,
|
||||
dilation_rate=(1, 1),
|
||||
use_bias=True,
|
||||
kernel_initializer=self.dense_hparams.kernel_initializer,
|
||||
bias_initializer=self.dense_hparams.bias_initializer,
|
||||
trainable=training,
|
||||
name='dense'
|
||||
)
|
||||
else:
|
||||
logits = layers.dense(
|
||||
inputs=net,
|
||||
|
@ -454,7 +478,8 @@ class ResnetModel(object):
|
|||
use_bias=True,
|
||||
trainable=training,
|
||||
kernel_initializer=self.dense_hparams.kernel_initializer,
|
||||
bias_initializer=self.dense_hparams.bias_initializer)
|
||||
bias_initializer=self.dense_hparams.bias_initializer
|
||||
)
|
||||
|
||||
if logits.dtype != tf.float32:
|
||||
logits = tf.cast(logits, tf.float32)
|
||||
|
@ -464,27 +489,25 @@ class ResnetModel(object):
|
|||
|
||||
return probs, logits
|
||||
|
||||
|
||||
model_architectures = {
|
||||
'resnet50': {
|
||||
'layers': [3, 4, 6, 3],
|
||||
'widths': [64, 128, 256, 512],
|
||||
'expansions': 4,
|
||||
},
|
||||
|
||||
'resnext101-32x4d': {
|
||||
'layers': [3, 4, 23, 3],
|
||||
'widths': [128, 256, 512, 1024],
|
||||
'expansions': 2,
|
||||
'cardinality': 32,
|
||||
},
|
||||
|
||||
'se-resnext101-32x4d' : {
|
||||
'cardinality' : 32,
|
||||
'layers' : [3, 4, 23, 3],
|
||||
'widths' : [128, 256, 512, 1024],
|
||||
'expansions' : 2,
|
||||
'se-resnext101-32x4d': {
|
||||
'cardinality': 32,
|
||||
'layers': [3, 4, 23, 3],
|
||||
'widths': [128, 256, 512, 1024],
|
||||
'expansions': 2,
|
||||
'use_se': True,
|
||||
'se_ratio': 16,
|
||||
},
|
||||
|
||||
}
|
||||
|
|
|
@ -71,4 +71,4 @@ if __name__=='__main__':
|
|||
file.write("model_checkpoint_path: "+ "\"" + new_ckpt + "\"")
|
||||
|
||||
# Process the input checkpoint, apply transforms and generate a new checkpoint.
|
||||
process_checkpoint(input_ckpt, new_ckpt_path, args.dense_layer)
|
||||
process_checkpoint(input_ckpt, new_ckpt_path, args.dense_layer)
|
||||
|
|
|
@ -244,16 +244,16 @@ For example, to train on DGX-1 for 90 epochs using AMP, run:
|
|||
Additionally, features like DALI data preprocessing or TensorFlow XLA can be enabled with
|
||||
following arguments when running those scripts:
|
||||
|
||||
`bash ./resnet50v1.5/training/DGX1_RN50_AMP_90E.sh /path/to/result /data --use_xla --use_dali`
|
||||
`bash ./resnet50v1.5/training/DGX1_RN50_AMP_90E.sh /path/to/result /data --xla --dali`
|
||||
|
||||
7. Start validation/evaluation.
|
||||
To evaluate the validation dataset located in `/data/tfrecords`, run `main.py` with
|
||||
`--mode=evaluate`. For example:
|
||||
|
||||
`python main.py --mode=evaluate --data_dir=/data/tfrecords --batch_size <batch size> --model_dir
|
||||
<model location> --results_dir <output location> [--use_xla] [--use_tf_amp]`
|
||||
<model location> --results_dir <output location> [--xla] [--amp]`
|
||||
|
||||
The optional `--use_xla` and `--use_tf_amp` flags control XLA and AMP during evaluation.
|
||||
The optional `--xla` and `--amp` flags control XLA and AMP during evaluation.
|
||||
|
||||
## Advanced
|
||||
|
||||
|
@ -292,99 +292,116 @@ The `runtime/` directory contains the following module that define the mechanics
|
|||
The script for training and evaluating the ResNet-50 v1.5 model has a variety of parameters that control these processes.
|
||||
|
||||
```
|
||||
usage: main.py [-h]
|
||||
[--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}]
|
||||
usage: main.py [-h] [--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}]
|
||||
[--mode {train,train_and_evaluate,evaluate,predict,training_benchmark,inference_benchmark}]
|
||||
[--data_dir DATA_DIR] [--data_idx_dir DATA_IDX_DIR]
|
||||
[--export_dir EXPORT_DIR] [--to_predict TO_PREDICT]
|
||||
[--batch_size BATCH_SIZE] [--num_iter NUM_ITER]
|
||||
[--iter_unit {epoch,batch}] [--warmup_steps WARMUP_STEPS]
|
||||
[--model_dir MODEL_DIR] [--results_dir RESULTS_DIR]
|
||||
[--log_filename LOG_FILENAME] [--display_every DISPLAY_EVERY]
|
||||
[--lr_init LR_INIT] [--lr_warmup_epochs LR_WARMUP_EPOCHS]
|
||||
[--weight_decay WEIGHT_DECAY] [--weight_init {fan_in,fan_out}]
|
||||
[--momentum MOMENTUM] [--loss_scale LOSS_SCALE]
|
||||
[--label_smoothing LABEL_SMOOTHING] [--mixup MIXUP]
|
||||
[--use_static_loss_scaling | --nouse_static_loss_scaling]
|
||||
[--use_xla | --nouse_xla] [--use_dali | --nouse_dali]
|
||||
[--use_tf_amp | --nouse_tf_amp]
|
||||
[--use_cosine_lr | --nouse_cosine_lr] [--seed SEED]
|
||||
[--export_dir EXPORT_DIR] [--to_predict TO_PREDICT]
|
||||
--batch_size BATCH_SIZE [--num_iter NUM_ITER]
|
||||
[--run_iter RUN_ITER] [--iter_unit {epoch,batch}]
|
||||
[--warmup_steps WARMUP_STEPS] [--model_dir MODEL_DIR]
|
||||
[--results_dir RESULTS_DIR] [--log_filename LOG_FILENAME]
|
||||
[--display_every DISPLAY_EVERY] [--seed SEED]
|
||||
[--gpu_memory_fraction GPU_MEMORY_FRACTION] [--gpu_id GPU_ID]
|
||||
|
||||
JoC-RN50v1.5-TF
|
||||
|
||||
optional arguments:
|
||||
-h, --help Show this help message and exit
|
||||
[--finetune_checkpoint FINETUNE_CHECKPOINT] [--use_final_conv]
|
||||
[--quant_delay QUANT_DELAY] [--quantize] [--use_qdq]
|
||||
[--symmetric] [--data_dir DATA_DIR]
|
||||
[--data_idx_dir DATA_IDX_DIR] [--dali]
|
||||
[--synthetic_data_size SYNTHETIC_DATA_SIZE] [--lr_init LR_INIT]
|
||||
[--lr_warmup_epochs LR_WARMUP_EPOCHS]
|
||||
[--weight_decay WEIGHT_DECAY] [--weight_init {fan_in,fan_out}]
|
||||
[--momentum MOMENTUM] [--label_smoothing LABEL_SMOOTHING]
|
||||
[--mixup MIXUP] [--cosine_lr] [--xla]
|
||||
[--data_format {NHWC,NCHW}] [--amp]
|
||||
[--static_loss_scale STATIC_LOSS_SCALE]
|
||||
|
||||
JoC-RN50v1.5-TF
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit.
|
||||
--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}
|
||||
Architecture of model to run (default is resnet50)
|
||||
Architecture of model to run.
|
||||
--mode {train,train_and_evaluate,evaluate,predict,training_benchmark,inference_benchmark}
|
||||
The execution mode of the script.
|
||||
--export_dir EXPORT_DIR
|
||||
Directory in which to write exported SavedModel.
|
||||
--to_predict TO_PREDICT
|
||||
Path to file or directory of files to run prediction
|
||||
on.
|
||||
--batch_size BATCH_SIZE
|
||||
Size of each minibatch per GPU.
|
||||
--num_iter NUM_ITER Number of iterations to run.
|
||||
--run_iter RUN_ITER Number of training iterations to run on single run.
|
||||
--iter_unit {epoch,batch}
|
||||
Unit of iterations.
|
||||
--warmup_steps WARMUP_STEPS
|
||||
Number of steps considered as warmup and not taken
|
||||
into account for performance measurements.
|
||||
--model_dir MODEL_DIR
|
||||
Directory in which to write model. If undefined,
|
||||
results dir will be used.
|
||||
--results_dir RESULTS_DIR
|
||||
Directory in which to write training logs, summaries
|
||||
and checkpoints.
|
||||
--log_filename LOG_FILENAME
|
||||
Name of the JSON file to which write the training log.
|
||||
--display_every DISPLAY_EVERY
|
||||
How often (in batches) to print out running
|
||||
information.
|
||||
--seed SEED Random seed.
|
||||
--gpu_memory_fraction GPU_MEMORY_FRACTION
|
||||
Limit memory fraction used by training script for DALI.
|
||||
--gpu_id GPU_ID Specify ID of the target GPU on multi-device platform.
|
||||
Effective only for single-GPU mode.
|
||||
--finetune_checkpoint FINETUNE_CHECKPOINT
|
||||
Path to pre-trained checkpoint which will be used for
|
||||
fine-tuning.
|
||||
--use_final_conv Use convolution operator instead of MLP as last layer.
|
||||
--quant_delay QUANT_DELAY
|
||||
Number of steps to be run before quantization starts
|
||||
to happen.
|
||||
--quantize Quantize weights and activations during training.
|
||||
(Defaults to Assymmetric quantization)
|
||||
--use_qdq Use QDQV3 op instead of FakeQuantWithMinMaxVars op for
|
||||
quantization. QDQv3 does only scaling.
|
||||
--symmetric Quantize weights and activations during training using
|
||||
symmetric quantization.
|
||||
|
||||
Dataset arguments:
|
||||
--data_dir DATA_DIR Path to dataset in TFRecord format. Files should be
|
||||
named 'train-*' and 'validation-*'.
|
||||
--data_idx_dir DATA_IDX_DIR
|
||||
Path to index files for DALI. Files should be named
|
||||
'train-*' and 'validation-*'.
|
||||
--export_dir EXPORT_DIR
|
||||
Directory in which to write exported SavedModel.
|
||||
--to_predict TO_PREDICT
|
||||
Path to file or directory of files to run prediction
|
||||
on.
|
||||
--batch_size BATCH_SIZE
|
||||
Size of each minibatch per GPU.
|
||||
--num_iter NUM_ITER Number of iterations to run.
|
||||
--iter_unit {epoch,batch}
|
||||
Unit of iterations.
|
||||
--warmup_steps WARMUP_STEPS
|
||||
Number of steps considered as warmup and not taken
|
||||
into account for performance measurements.
|
||||
--model_dir MODEL_DIR
|
||||
Directory in which to write the model. If undefined,
|
||||
results directory will be used.
|
||||
--results_dir RESULTS_DIR
|
||||
Directory in which to write training logs, summaries
|
||||
and checkpoints.
|
||||
--log_filename LOG_FILENAME
|
||||
Name of the JSON file to which write the training log
|
||||
--display_every DISPLAY_EVERY
|
||||
How often (in batches) to print out running
|
||||
information.
|
||||
--dali Enable DALI data input.
|
||||
--synthetic_data_size SYNTHETIC_DATA_SIZE
|
||||
Dimension of image for synthetic dataset.
|
||||
|
||||
Training arguments:
|
||||
--lr_init LR_INIT Initial value for the learning rate.
|
||||
--lr_warmup_epochs LR_WARMUP_EPOCHS
|
||||
Number of warmup epochs for the learning rate schedule.
|
||||
Number of warmup epochs for learning rate schedule.
|
||||
--weight_decay WEIGHT_DECAY
|
||||
Weight Decay scale factor.
|
||||
--weight_init {fan_in,fan_out}
|
||||
Model weight initialization method.
|
||||
--momentum MOMENTUM SGD momentum value for the momentum optimizer.
|
||||
--loss_scale LOSS_SCALE
|
||||
Loss scale for FP16 training and fast math FP32.
|
||||
--momentum MOMENTUM SGD momentum value for the Momentum optimizer.
|
||||
--label_smoothing LABEL_SMOOTHING
|
||||
The value of label smoothing.
|
||||
--mixup MIXUP The alpha parameter for mixup (if 0 then mixup is not
|
||||
applied).
|
||||
--use_static_loss_scaling
|
||||
Use static loss scaling in FP16 or FP32 AMP.
|
||||
--nouse_static_loss_scaling
|
||||
--use_xla Enable XLA (Accelerated Linear Algebra) computation
|
||||
--cosine_lr Use cosine learning rate schedule.
|
||||
|
||||
Generic optimization arguments:
|
||||
--xla Enable XLA (Accelerated Linear Algebra) computation
|
||||
for improved performance.
|
||||
--nouse_xla
|
||||
--use_dali Enable DALI data input.
|
||||
--nouse_dali
|
||||
--use_tf_amp Enable AMP to speedup FP32
|
||||
computation using Tensor Cores.
|
||||
--nouse_tf_amp
|
||||
--use_cosine_lr Use cosine learning rate schedule.
|
||||
--nouse_cosine_lr
|
||||
--seed SEED Random seed.
|
||||
--gpu_memory_fraction GPU_MEMORY_FRACTION
|
||||
Limit memory fraction used by the training script for DALI
|
||||
--gpu_id GPU_ID Specify the ID of the target GPU on a multi-device platform.
|
||||
Effective only for single-GPU mode.
|
||||
--quantize Used to add quantization nodes in the graph (Default: Asymmetric quantization)
|
||||
--symmetric If --quantize mode is used, this option enables symmetric quantization
|
||||
--use_qdq Use quantize_and_dequantize (QDQ) op instead of FakeQuantWithMinMaxVars op for quantization. QDQ does only scaling.
|
||||
--finetune_checkpoint Path to pre-trained checkpoint which can be used for fine-tuning
|
||||
--quant_delay Number of steps to be run before quantization starts to happen
|
||||
--data_format {NHWC,NCHW}
|
||||
Data format used to do calculations.
|
||||
--amp Enable Automatic Mixed Precision to speedup
|
||||
computation using tensor cores.
|
||||
|
||||
Automatic Mixed Precision arguments:
|
||||
--static_loss_scale STATIC_LOSS_SCALE
|
||||
Use static loss scaling in FP32 AMP.
|
||||
|
||||
```
|
||||
|
||||
### Quantization Aware Training
|
||||
|
@ -424,12 +441,13 @@ Arguments:
|
|||
* `--input_format` : Data format of input tensor (Default: NCHW). Use NCHW format to optimize the graph with TensorRT.
|
||||
* `--compute_format` : Data format of the operations in the network (Default: NCHW). Use NCHW format to optimize the graph with TensorRT.
|
||||
|
||||
|
||||
### Inference process
|
||||
To run inference on a single example with a checkpoint and a model script, use:
|
||||
|
||||
`python main.py --mode predict --model_dir <path to model> --to_predict <path to image> --results_dir <path to results>`
|
||||
|
||||
The optional `--use_xla` and `--use_tf_amp` flags control XLA and AMP during inference.
|
||||
The optional `--xla` and `--amp` flags control XLA and AMP during inference.
|
||||
|
||||
## Performance
|
||||
|
||||
|
@ -448,7 +466,7 @@ To benchmark the training performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`python ./main.py --mode=training_benchmark --use_tf_amp --warmup_steps 200 --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`python ./main.py --mode=training_benchmark --amp --warmup_steps 200 --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
* For multiple GPUs
|
||||
* FP32 / TF32
|
||||
|
@ -457,16 +475,18 @@ To benchmark the training performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`mpiexec --allow-run-as-root --bind-to socket -np <num_gpus> python ./main.py --mode=training_benchmark --use_tf_amp --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`mpiexec --allow-run-as-root --bind-to socket -np <num_gpus> python ./main.py --mode=training_benchmark --amp --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
|
||||
Each of these scripts runs 200 warm-up iterations and measures the first epoch.
|
||||
|
||||
To control warmup and benchmark length, use the `--warmup_steps`, `--num_iter` and `--iter_unit` flags. Features like XLA or DALI can be controlled
|
||||
with `--use_xla` and `--use_dali` flags. If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset.
|
||||
For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
with `--xla` and `--dali` flags. For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
Suggested batch sizes for training are 256 for mixed precision training and 128 for single precision training per single V100 16 GB.
|
||||
|
||||
If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset. The resolution of synthetic images used can be controlled with `--synthetic_data_size` flag.
|
||||
|
||||
|
||||
#### Inference performance benchmark
|
||||
|
||||
To benchmark the inference performance on a specific batch size, run:
|
||||
|
@ -477,11 +497,10 @@ To benchmark the inference performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`python ./main.py --mode=inference_benchmark --use_tf_amp --warmup_steps 20 --num_iter 100 --iter_unit batch --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`python ./main.py --mode=inference_benchmark --amp --warmup_steps 20 --num_iter 100 --iter_unit batch --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
By default, each of these scripts runs 20 warm-up iterations and measures the next 80 iterations.
|
||||
To control warm-up and benchmark length, use the `--warmup_steps`, `--num_iter` and `--iter_unit` flags.
|
||||
For proper throughput and latency reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset.
|
||||
|
||||
The benchmark can be automated with the `inference_benchmark.sh` script provided in `resnet50v1.5`, by simply running:
|
||||
|
@ -490,6 +509,9 @@ The benchmark can be automated with the `inference_benchmark.sh` script provided
|
|||
The `<data dir>` parameter refers to the input data directory (by default `/data/tfrecords` inside the container).
|
||||
By default, the benchmark tests the following configurations: **FP32**, **AMP**, **AMP + XLA** with different batch sizes.
|
||||
When the optional directory with the DALI index files `<data idx dir>` is specified, the benchmark executes an additional **DALI + AMP + XLA** configuration.
|
||||
For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
|
||||
For performance benchmark of raw model, synthetic dataset can be used. To use synthetic dataset, use `--synthetic_data_size` flag instead of `--data_dir` to specify input image size.
|
||||
|
||||
### Results
|
||||
|
||||
|
@ -568,17 +590,6 @@ on NVIDIA DGX A100 with (8x A100 40G) GPUs.
|
|||
| 8 | ~2h | ~5h |
|
||||
|
||||
|
||||
##### Training time: NVIDIA DGX A100 (8x A100 40GB)
|
||||
|
||||
Our results were estimated based on the [training performance results](#training-performance-nvidia-dgx-a100-8x-a100-40g)
|
||||
on NVIDIA DGX A100 with (8x A100 40G) GPUs.
|
||||
|
||||
| GPUs | Time to train - mixed precision + XLA | Time to train - mixed precision | Time to train - TF32 + XLA | Time to train - TF32 |
|
||||
|---|--------|---------|---------|-------|
|
||||
| 1 | ~18h | ~19.5h | ~40h | ~47h |
|
||||
| 8 | ~2h | ~2.5h | ~5h | ~6h |
|
||||
|
||||
|
||||
##### Training time: NVIDIA DGX-1 (8x V100 16G)
|
||||
|
||||
Our results were estimated based on the [training performance results](#training-performance-nvidia-dgx-1-8x-v100-16g)
|
||||
|
@ -821,22 +832,25 @@ on NVIDIA T4 with (1x T4 16G) GPU.
|
|||
* Added benchmark results for DGX-2 and XLA-enabled DGX-1 and DGX-2.
|
||||
3. July, 2019
|
||||
* Added Cosine learning rate schedule
|
||||
3. August, 2019
|
||||
4. August, 2019
|
||||
* Added mixup regularization
|
||||
* Added T4 benchmarks
|
||||
* Improved inference capabilities
|
||||
* Added SavedModel export
|
||||
4. January, 2020
|
||||
5. January, 2020
|
||||
* Removed manual checks for dataset paths to facilitate cloud storage solutions
|
||||
* Move to a new logging solution
|
||||
* Bump base docker image version
|
||||
5. March, 2020
|
||||
6. March, 2020
|
||||
* Code cleanup and refactor
|
||||
* Improved training process
|
||||
6. June, 2020
|
||||
7. June, 2020
|
||||
* Added benchmark results for DGX-A100
|
||||
* Updated benchmark results for DGX-1, DGX-2 and T4
|
||||
* Updated base docker image version
|
||||
8. August 2020
|
||||
* Updated command line argument names
|
||||
* Added support for syntetic dataset with different image size
|
||||
|
||||
### Known issues
|
||||
Performance without XLA enabled is low. We recommend using XLA.
|
||||
Performance without XLA enabled is low due to BN + ReLU fusion bug.
|
||||
|
|
|
@ -22,12 +22,12 @@ function test_configuration() {
|
|||
}
|
||||
|
||||
test_configuration "FP32 nodali noxla"
|
||||
test_configuration "FP32 nodali xla" "--use_xla"
|
||||
test_configuration "FP16 nodali noxla" "--use_tf_amp"
|
||||
test_configuration "FP16 nodali xla" "--use_tf_amp --use_xla"
|
||||
test_configuration "FP32 nodali xla" "--xla"
|
||||
test_configuration "FP16 nodali noxla" "--amp"
|
||||
test_configuration "FP16 nodali xla" "--amp --xla"
|
||||
|
||||
if [ ! -z $DALI_DIR ]; then
|
||||
test_configuration "FP16 dali xla" "--use_tf_amp --use_xla --use_dali --data_idx_dir ${DALI_DIR}"
|
||||
test_configuration "FP16 dali xla" "--amp --xla --dali --data_idx_dir ${DALI_DIR}"
|
||||
fi
|
||||
|
||||
cat $INFERENCE_BENCHMARK
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 16 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 16 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnet50 \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script does Quantization aware training of Resnet-50 by finetuning on the pre-trained model using 1 GPU and a batch size of 32.
|
||||
# Usage ./GPU1_RN50_QAT.sh <path to the pre-trained model> <path to dataset> <path to results directory>
|
||||
|
||||
python main.py --mode=train_and_evaluate --batch_size=32 --lr_warmup_epochs=1 --quantize --symmetric --use_qdq --label_smoothing 0.1 --lr_init=0.00005 --momentum=0.875 --weight_decay=3.0517578125e-05 --finetune_checkpoint=$1 --data_dir=$2 --results_dir=$3 --num_iter 10 --data_format NHWC
|
|
@ -26,13 +26,13 @@ function run_benchmark() {
|
|||
MODE_SIZE=$2
|
||||
|
||||
if [[ $4 -eq "1" ]]; then
|
||||
XLA="--use_xla"
|
||||
XLA="--xla"
|
||||
else
|
||||
XLA=""
|
||||
fi
|
||||
|
||||
case $2 in
|
||||
"amp") MODE_FLAGS="--use_tf_amp --use_static_loss_scaling --loss_scale=128";;
|
||||
"amp") MODE_FLAGS="--amp --static_loss_scale 128";;
|
||||
"fp32"|"tf32") MODE_FLAGS="";;
|
||||
*) echo "Unsupported configuration, use amp, tf32 or fp32";;
|
||||
esac
|
||||
|
|
|
@ -251,16 +251,16 @@ For example, to train on DGX-1 for 90 epochs using AMP, run:
|
|||
Additionally, features like DALI data preprocessing or TensorFlow XLA can be enabled with
|
||||
following arguments when running those scripts:
|
||||
|
||||
`bash ./resnext101-32x4d/training/DGX1_RNxt101-32x4d_AMP_90E.sh /path/to/result /data --use_xla --use_dali`
|
||||
`bash ./resnext101-32x4d/training/DGX1_RNxt101-32x4d_AMP_90E.sh /path/to/result /data --xla --dali`
|
||||
|
||||
7. Start validation/evaluation.
|
||||
To evaluate the validation dataset located in `/data/tfrecords`, run `main.py` with
|
||||
`--mode=evaluate`. For example:
|
||||
|
||||
`python main.py --arch=resnext101-32x4d --mode=evaluate --data_dir=/data/tfrecords --batch_size <batch size> --model_dir
|
||||
<model location> --results_dir <output location> [--use_xla] [--use_tf_amp]`
|
||||
<model location> --results_dir <output location> [--xla] [--amp]`
|
||||
|
||||
The optional `--use_xla` and `--use_tf_amp` flags control XLA and AMP during evaluation.
|
||||
The optional `--xla` and `--amp` flags control XLA and AMP during evaluation.
|
||||
|
||||
## Advanced
|
||||
|
||||
|
@ -299,95 +299,116 @@ The `runtime/` directory contains the following module that define the mechanics
|
|||
The script for training and evaluating the ResNext101-32x4d model has a variety of parameters that control these processes.
|
||||
|
||||
```
|
||||
usage: main.py [-h]
|
||||
[--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}]
|
||||
usage: main.py [-h] [--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}]
|
||||
[--mode {train,train_and_evaluate,evaluate,predict,training_benchmark,inference_benchmark}]
|
||||
[--data_dir DATA_DIR] [--data_idx_dir DATA_IDX_DIR]
|
||||
[--export_dir EXPORT_DIR] [--to_predict TO_PREDICT]
|
||||
[--batch_size BATCH_SIZE] [--num_iter NUM_ITER]
|
||||
[--iter_unit {epoch,batch}] [--warmup_steps WARMUP_STEPS]
|
||||
[--model_dir MODEL_DIR] [--results_dir RESULTS_DIR]
|
||||
[--log_filename LOG_FILENAME] [--display_every DISPLAY_EVERY]
|
||||
[--lr_init LR_INIT] [--lr_warmup_epochs LR_WARMUP_EPOCHS]
|
||||
[--weight_decay WEIGHT_DECAY] [--weight_init {fan_in,fan_out}]
|
||||
[--momentum MOMENTUM] [--loss_scale LOSS_SCALE]
|
||||
[--label_smoothing LABEL_SMOOTHING] [--mixup MIXUP]
|
||||
[--use_static_loss_scaling | --nouse_static_loss_scaling]
|
||||
[--use_xla | --nouse_xla] [--use_dali | --nouse_dali]
|
||||
[--use_tf_amp | --nouse_tf_amp]
|
||||
[--use_cosine_lr | --nouse_cosine_lr] [--seed SEED]
|
||||
[--export_dir EXPORT_DIR] [--to_predict TO_PREDICT]
|
||||
--batch_size BATCH_SIZE [--num_iter NUM_ITER]
|
||||
[--run_iter RUN_ITER] [--iter_unit {epoch,batch}]
|
||||
[--warmup_steps WARMUP_STEPS] [--model_dir MODEL_DIR]
|
||||
[--results_dir RESULTS_DIR] [--log_filename LOG_FILENAME]
|
||||
[--display_every DISPLAY_EVERY] [--seed SEED]
|
||||
[--gpu_memory_fraction GPU_MEMORY_FRACTION] [--gpu_id GPU_ID]
|
||||
|
||||
JoC-RN50v1.5-TF
|
||||
|
||||
optional arguments:
|
||||
-h, --help Show this help message and exit
|
||||
[--finetune_checkpoint FINETUNE_CHECKPOINT] [--use_final_conv]
|
||||
[--quant_delay QUANT_DELAY] [--quantize] [--use_qdq]
|
||||
[--symmetric] [--data_dir DATA_DIR]
|
||||
[--data_idx_dir DATA_IDX_DIR] [--dali]
|
||||
[--synthetic_data_size SYNTHETIC_DATA_SIZE] [--lr_init LR_INIT]
|
||||
[--lr_warmup_epochs LR_WARMUP_EPOCHS]
|
||||
[--weight_decay WEIGHT_DECAY] [--weight_init {fan_in,fan_out}]
|
||||
[--momentum MOMENTUM] [--label_smoothing LABEL_SMOOTHING]
|
||||
[--mixup MIXUP] [--cosine_lr] [--xla]
|
||||
[--data_format {NHWC,NCHW}] [--amp]
|
||||
[--static_loss_scale STATIC_LOSS_SCALE]
|
||||
|
||||
JoC-RN50v1.5-TF
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit.
|
||||
--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}
|
||||
Architecture of model to run (to run Resnext-32x4d set
|
||||
--arch=rensext101-32x4d)
|
||||
Architecture of model to run.
|
||||
--mode {train,train_and_evaluate,evaluate,predict,training_benchmark,inference_benchmark}
|
||||
The execution mode of the script.
|
||||
--export_dir EXPORT_DIR
|
||||
Directory in which to write exported SavedModel.
|
||||
--to_predict TO_PREDICT
|
||||
Path to file or directory of files to run prediction
|
||||
on.
|
||||
--batch_size BATCH_SIZE
|
||||
Size of each minibatch per GPU.
|
||||
--num_iter NUM_ITER Number of iterations to run.
|
||||
--run_iter RUN_ITER Number of training iterations to run on single run.
|
||||
--iter_unit {epoch,batch}
|
||||
Unit of iterations.
|
||||
--warmup_steps WARMUP_STEPS
|
||||
Number of steps considered as warmup and not taken
|
||||
into account for performance measurements.
|
||||
--model_dir MODEL_DIR
|
||||
Directory in which to write model. If undefined,
|
||||
results dir will be used.
|
||||
--results_dir RESULTS_DIR
|
||||
Directory in which to write training logs, summaries
|
||||
and checkpoints.
|
||||
--log_filename LOG_FILENAME
|
||||
Name of the JSON file to which write the training log.
|
||||
--display_every DISPLAY_EVERY
|
||||
How often (in batches) to print out running
|
||||
information.
|
||||
--seed SEED Random seed.
|
||||
--gpu_memory_fraction GPU_MEMORY_FRACTION
|
||||
Limit memory fraction used by training script for DALI.
|
||||
--gpu_id GPU_ID Specify ID of the target GPU on multi-device platform.
|
||||
Effective only for single-GPU mode.
|
||||
--finetune_checkpoint FINETUNE_CHECKPOINT
|
||||
Path to pre-trained checkpoint which will be used for
|
||||
fine-tuning.
|
||||
--use_final_conv Use convolution operator instead of MLP as last layer.
|
||||
--quant_delay QUANT_DELAY
|
||||
Number of steps to be run before quantization starts
|
||||
to happen.
|
||||
--quantize Quantize weights and activations during training.
|
||||
(Defaults to Assymmetric quantization)
|
||||
--use_qdq Use QDQV3 op instead of FakeQuantWithMinMaxVars op for
|
||||
quantization. QDQv3 does only scaling.
|
||||
--symmetric Quantize weights and activations during training using
|
||||
symmetric quantization.
|
||||
|
||||
Dataset arguments:
|
||||
--data_dir DATA_DIR Path to dataset in TFRecord format. Files should be
|
||||
named 'train-*' and 'validation-*'.
|
||||
--data_idx_dir DATA_IDX_DIR
|
||||
Path to index files for DALI. Files should be named
|
||||
'train-*' and 'validation-*'.
|
||||
--export_dir EXPORT_DIR
|
||||
Directory in which to write exported SavedModel.
|
||||
--to_predict TO_PREDICT
|
||||
Path to file or directory of files to run prediction
|
||||
on.
|
||||
--batch_size BATCH_SIZE
|
||||
Size of each minibatch per GPU.
|
||||
--num_iter NUM_ITER Number of iterations to run.
|
||||
--iter_unit {epoch,batch}
|
||||
Unit of iterations.
|
||||
--warmup_steps WARMUP_STEPS
|
||||
Number of steps considered as warmup and not taken
|
||||
into account for performance measurements.
|
||||
--model_dir MODEL_DIR
|
||||
Directory in which to write the model. If undefined,
|
||||
results directory will be used.
|
||||
--results_dir RESULTS_DIR
|
||||
Directory in which to write training logs, summaries
|
||||
and checkpoints.
|
||||
--log_filename LOG_FILENAME
|
||||
Name of the JSON file to which write the training log
|
||||
--display_every DISPLAY_EVERY
|
||||
How often (in batches) to print out running
|
||||
information.
|
||||
--dali Enable DALI data input.
|
||||
--synthetic_data_size SYNTHETIC_DATA_SIZE
|
||||
Dimension of image for synthetic dataset.
|
||||
|
||||
Training arguments:
|
||||
--lr_init LR_INIT Initial value for the learning rate.
|
||||
--lr_warmup_epochs LR_WARMUP_EPOCHS
|
||||
Number of warmup epochs for the learning rate schedule.
|
||||
Number of warmup epochs for learning rate schedule.
|
||||
--weight_decay WEIGHT_DECAY
|
||||
Weight Decay scale factor.
|
||||
--weight_init {fan_in,fan_out}
|
||||
Model weight initialization method.
|
||||
--momentum MOMENTUM SGD momentum value for the momentum optimizer.
|
||||
--loss_scale LOSS_SCALE
|
||||
Loss scale for FP16 training and fast math FP32.
|
||||
--momentum MOMENTUM SGD momentum value for the Momentum optimizer.
|
||||
--label_smoothing LABEL_SMOOTHING
|
||||
The value of label smoothing.
|
||||
--mixup MIXUP The alpha parameter for mixup (if 0 then mixup is not
|
||||
applied).
|
||||
--use_static_loss_scaling
|
||||
Use static loss scaling in FP16 or FP32 AMP.
|
||||
--nouse_static_loss_scaling
|
||||
--use_xla Enable XLA (Accelerated Linear Algebra) computation
|
||||
--cosine_lr Use cosine learning rate schedule.
|
||||
|
||||
Generic optimization arguments:
|
||||
--xla Enable XLA (Accelerated Linear Algebra) computation
|
||||
for improved performance.
|
||||
--nouse_xla
|
||||
--use_dali Enable DALI data input.
|
||||
--nouse_dali
|
||||
--use_tf_amp Enable AMP to speedup FP32
|
||||
computation using Tensor Cores.
|
||||
--nouse_tf_amp
|
||||
--use_cosine_lr Use cosine learning rate schedule.
|
||||
--nouse_cosine_lr
|
||||
--seed SEED Random seed.
|
||||
--gpu_memory_fraction GPU_MEMORY_FRACTION
|
||||
Limit memory fraction used by the training script for DALI
|
||||
--gpu_id GPU_ID Specify the ID of the target GPU on a multi-device platform.
|
||||
Effective only for single-GPU mode.
|
||||
--data_format {NHWC,NCHW}
|
||||
Data format used to do calculations.
|
||||
--amp Enable Automatic Mixed Precision to speedup
|
||||
computation using tensor cores.
|
||||
|
||||
Automatic Mixed Precision arguments:
|
||||
--static_loss_scale STATIC_LOSS_SCALE
|
||||
Use static loss scaling in FP32 AMP.
|
||||
|
||||
```
|
||||
|
||||
### Inference process
|
||||
|
@ -395,7 +416,7 @@ To run inference on a single example with a checkpoint and a model script, use:
|
|||
|
||||
`python main.py --arch=resnext101-32x4d --mode predict --model_dir <path to model> --to_predict <path to image> --results_dir <path to results>`
|
||||
|
||||
The optional `--use_xla` and `--use_tf_amp` flags control XLA and AMP during inference.
|
||||
The optional `--xla` and `--amp` flags control XLA and AMP during inference.
|
||||
|
||||
## Performance
|
||||
|
||||
|
@ -414,7 +435,7 @@ To benchmark the training performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`python ./main.py --arch=resnext101-32x4d --mode=training_benchmark --use_tf_amp --warmup_steps 200 --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`python ./main.py --arch=resnext101-32x4d --mode=training_benchmark --amp --warmup_steps 200 --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
* For multiple GPUs
|
||||
* FP32 / TF32
|
||||
|
@ -423,16 +444,16 @@ To benchmark the training performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`mpiexec --allow-run-as-root --bind-to socket -np <num_gpus> python ./main.py --arch=resnext101-32x4d --mode=training_benchmark --use_tf_amp --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`mpiexec --allow-run-as-root --bind-to socket -np <num_gpus> python ./main.py --arch=resnext101-32x4d --mode=training_benchmark --amp --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
|
||||
Each of these scripts runs 200 warm-up iterations and measures the first epoch.
|
||||
|
||||
To control warmup and benchmark length, use the `--warmup_steps`, `--num_iter` and `--iter_unit` flags. Features like XLA or DALI can be controlled
|
||||
with `--use_xla` and `--use_dali` flags. If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset.
|
||||
For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
with `--xla` and `--dali` flags. For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
Suggested batch sizes for training are 128 for mixed precision training and 64 for single precision training per single V100 16 GB.
|
||||
|
||||
If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset. The resolution of synthetic images used can be controlled with `--synthetic_data_size` flag.
|
||||
|
||||
#### Inference performance benchmark
|
||||
|
||||
|
@ -444,11 +465,10 @@ To benchmark the inference performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`python ./main.py --arch=resnext101-32x4d --mode=inference_benchmark --use_tf_amp --warmup_steps 20 --num_iter 100 --iter_unit batch --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`python ./main.py --arch=resnext101-32x4d --mode=inference_benchmark --amp --warmup_steps 20 --num_iter 100 --iter_unit batch --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
By default, each of these scripts runs 20 warm-up iterations and measures the next 80 iterations.
|
||||
To control warm-up and benchmark length, use the `--warmup_steps`, `--num_iter` and `--iter_unit` flags.
|
||||
For proper throughput and latency reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset.
|
||||
|
||||
The benchmark can be automated with the `inference_benchmark.sh` script provided in `resnext101-32x4d`, by simply running:
|
||||
|
@ -457,6 +477,9 @@ The benchmark can be automated with the `inference_benchmark.sh` script provided
|
|||
The `<data dir>` parameter refers to the input data directory (by default `/data/tfrecords` inside the container).
|
||||
By default, the benchmark tests the following configurations: **FP32**, **AMP**, **AMP + XLA** with different batch sizes.
|
||||
When the optional directory with the DALI index files `<data idx dir>` is specified, the benchmark executes an additional **DALI + AMP + XLA** configuration.
|
||||
For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
|
||||
For performance benchamrk of raw model, synthetic dataset can be used. To use synthetic dataset, use `--synthetic_data_size` flag instead of `--data_dir` to specify input image size.
|
||||
|
||||
### Results
|
||||
|
||||
|
@ -769,6 +792,9 @@ on NVIDIA T4 with (1x T4 16G) GPU.
|
|||
|
||||
June 2020
|
||||
- Initial release
|
||||
August 2020
|
||||
- Updated command line argument names
|
||||
- Added support for syntetic dataset with different image size
|
||||
|
||||
### Known issues
|
||||
Performance without XLA enabled is low. We recommend using XLA.
|
||||
Performance without XLA enabled is low due to BN + ReLU fusion bug.
|
||||
|
|
|
@ -22,12 +22,12 @@ function test_configuration() {
|
|||
}
|
||||
|
||||
test_configuration "FP32 nodali noxla"
|
||||
test_configuration "FP32 nodali xla" "--use_xla"
|
||||
test_configuration "FP16 nodali noxla" "--use_tf_amp"
|
||||
test_configuration "FP16 nodali xla" "--use_tf_amp --use_xla"
|
||||
test_configuration "FP32 nodali xla" "--xla"
|
||||
test_configuration "FP16 nodali noxla" "--amp"
|
||||
test_configuration "FP16 nodali xla" "--amp --xla"
|
||||
|
||||
if [ ! -z $DALI_DIR ]; then
|
||||
test_configuration "FP16 dali xla" "--use_tf_amp --use_xla --use_dali --data_idx_dir ${DALI_DIR}"
|
||||
test_configuration "FP16 dali xla" "--amp --xla --dali --data_idx_dir ${DALI_DIR}"
|
||||
fi
|
||||
|
||||
cat $INFERENCE_BENCHMARK
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 16 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 16 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -26,13 +26,13 @@ function run_benchmark() {
|
|||
MODE_SIZE=$2
|
||||
|
||||
if [[ $4 -eq "1" ]]; then
|
||||
XLA="--use_xla"
|
||||
XLA="--xla"
|
||||
else
|
||||
XLA=""
|
||||
fi
|
||||
|
||||
case $2 in
|
||||
"amp") MODE_FLAGS="--use_tf_amp --use_static_loss_scaling --loss_scale=128";;
|
||||
"amp") MODE_FLAGS="--amp --static_loss_scale 128";;
|
||||
"fp32"|"tf32") MODE_FLAGS="";;
|
||||
*) echo "Unsupported configuration, use amp, tf32 or fp32";;
|
||||
esac
|
||||
|
|
|
@ -39,36 +39,34 @@ __all__ = [
|
|||
|
||||
|
||||
class Runner(object):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
# ========= Model HParams ========= #
|
||||
n_classes=1001,
|
||||
architecture='resnet50',
|
||||
input_format='NHWC', # NCHW or NHWC
|
||||
compute_format='NCHW', # NCHW or NHWC
|
||||
dtype=tf.float32, # tf.float32 or tf.float16
|
||||
n_channels=3,
|
||||
height=224,
|
||||
width=224,
|
||||
distort_colors=False,
|
||||
model_dir=None,
|
||||
log_dir=None,
|
||||
data_dir=None,
|
||||
data_idx_dir=None,
|
||||
weight_init="fan_out",
|
||||
self,
|
||||
# ========= Model HParams ========= #
|
||||
n_classes=1001,
|
||||
architecture='resnet50',
|
||||
input_format='NHWC', # NCHW or NHWC
|
||||
compute_format='NCHW', # NCHW or NHWC
|
||||
dtype=tf.float32, # tf.float32 or tf.float16
|
||||
n_channels=3,
|
||||
height=224,
|
||||
width=224,
|
||||
distort_colors=False,
|
||||
model_dir=None,
|
||||
log_dir=None,
|
||||
data_dir=None,
|
||||
data_idx_dir=None,
|
||||
weight_init="fan_out",
|
||||
|
||||
# ======= Optimization HParams ======== #
|
||||
use_xla=False,
|
||||
use_tf_amp=False,
|
||||
use_dali=False,
|
||||
gpu_memory_fraction=1.0,
|
||||
gpu_id=0,
|
||||
# ======= Optimization HParams ======== #
|
||||
use_xla=False,
|
||||
use_tf_amp=False,
|
||||
use_dali=False,
|
||||
gpu_memory_fraction=1.0,
|
||||
gpu_id=0,
|
||||
|
||||
# ======== Debug Flags ======== #
|
||||
debug_verbosity=0,
|
||||
seed=None
|
||||
):
|
||||
# ======== Debug Flags ======== #
|
||||
debug_verbosity=0,
|
||||
seed=None):
|
||||
|
||||
if dtype not in [tf.float32, tf.float16]:
|
||||
raise ValueError("Unknown dtype received: %s (allowed: `tf.float32` and `tf.float16`)" % dtype)
|
||||
|
@ -123,56 +121,49 @@ class Runner(object):
|
|||
|
||||
# =================================================
|
||||
|
||||
model_hparams = tf.contrib.training.HParams(
|
||||
width=height,
|
||||
height=width,
|
||||
n_channels=n_channels,
|
||||
n_classes=n_classes,
|
||||
dtype=dtype,
|
||||
input_format=input_format,
|
||||
compute_format=compute_format,
|
||||
distort_colors=distort_colors,
|
||||
seed=tf_seed
|
||||
)
|
||||
model_hparams = tf.contrib.training.HParams(width=height,
|
||||
height=width,
|
||||
n_channels=n_channels,
|
||||
n_classes=n_classes,
|
||||
dtype=dtype,
|
||||
input_format=input_format,
|
||||
compute_format=compute_format,
|
||||
distort_colors=distort_colors,
|
||||
seed=tf_seed)
|
||||
|
||||
num_preprocessing_threads = 10 if not use_dali else 4
|
||||
run_config_performance = tf.contrib.training.HParams(
|
||||
num_preprocessing_threads=num_preprocessing_threads,
|
||||
use_tf_amp=use_tf_amp,
|
||||
use_xla=use_xla,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
gpu_id=gpu_id
|
||||
)
|
||||
run_config_performance = tf.contrib.training.HParams(num_preprocessing_threads=num_preprocessing_threads,
|
||||
use_tf_amp=use_tf_amp,
|
||||
use_xla=use_xla,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
gpu_id=gpu_id)
|
||||
|
||||
run_config_additional = tf.contrib.training.HParams(
|
||||
model_dir=model_dir if not hvd_utils.is_using_hvd() or hvd.rank() == 0 else None,
|
||||
model_dir=model_dir, #if not hvd_utils.is_using_hvd() or hvd.rank() == 0 else None,
|
||||
log_dir=log_dir if not hvd_utils.is_using_hvd() or hvd.rank() == 0 else None,
|
||||
data_dir=data_dir,
|
||||
data_idx_dir=data_idx_dir,
|
||||
num_preprocessing_threads=num_preprocessing_threads
|
||||
)
|
||||
num_preprocessing_threads=num_preprocessing_threads)
|
||||
|
||||
self.run_hparams = Runner._build_hparams(model_hparams, run_config_additional, run_config_performance)
|
||||
|
||||
model_name = architecture
|
||||
architecture = resnet.model_architectures[architecture]
|
||||
|
||||
self._model = resnet.ResnetModel(
|
||||
model_name=model_name,
|
||||
n_classes=model_hparams.n_classes,
|
||||
layers_count=architecture["layers"],
|
||||
layers_depth=architecture["widths"],
|
||||
expansions=architecture["expansions"],
|
||||
input_format=model_hparams.input_format,
|
||||
compute_format=model_hparams.compute_format,
|
||||
dtype=model_hparams.dtype,
|
||||
weight_init=weight_init,
|
||||
use_dali=use_dali,
|
||||
cardinality=architecture['cardinality'] if 'cardinality' in architecture else 1,
|
||||
use_se=architecture['use_se'] if 'use_se' in architecture else False,
|
||||
se_ratio=architecture['se_ratio'] if 'se_ratio' in architecture else 1
|
||||
)
|
||||
self._model = resnet.ResnetModel(model_name=model_name,
|
||||
n_classes=model_hparams.n_classes,
|
||||
layers_count=architecture["layers"],
|
||||
layers_depth=architecture["widths"],
|
||||
expansions=architecture["expansions"],
|
||||
input_format=model_hparams.input_format,
|
||||
compute_format=model_hparams.compute_format,
|
||||
dtype=model_hparams.dtype,
|
||||
weight_init=weight_init,
|
||||
use_dali=use_dali,
|
||||
cardinality=architecture['cardinality'] if 'cardinality' in architecture else 1,
|
||||
use_se=architecture['use_se'] if 'use_se' in architecture else False,
|
||||
se_ratio=architecture['se_ratio'] if 'se_ratio' in architecture else 1)
|
||||
|
||||
if self.run_hparams.seed is not None:
|
||||
tf.set_random_seed(self.run_hparams.seed)
|
||||
|
@ -196,9 +187,7 @@ class Runner(object):
|
|||
except ValueError:
|
||||
warnings.warn(
|
||||
"the parameter `{}` already exists - existing value: {} and duplicated value: {}".format(
|
||||
key, hparams.get(key), val
|
||||
)
|
||||
)
|
||||
key, hparams.get(key), val))
|
||||
|
||||
return hparams
|
||||
|
||||
|
@ -214,9 +203,8 @@ class Runner(object):
|
|||
def _get_session_config(mode, use_xla, use_dali, gpu_memory_fraction, gpu_id=0):
|
||||
|
||||
if mode not in ["train", 'validation', 'benchmark', 'inference']:
|
||||
raise ValueError(
|
||||
"Unknown mode received: %s (allowed: 'train', 'validation', 'benchmark', 'inference')" % mode
|
||||
)
|
||||
raise ValueError("Unknown mode received: %s (allowed: 'train', 'validation', 'benchmark', 'inference')" %
|
||||
mode)
|
||||
|
||||
# Limit available GPU memory (tune the size)
|
||||
if use_dali:
|
||||
|
@ -240,10 +228,6 @@ class Runner(object):
|
|||
|
||||
config.gpu_options.force_gpu_compatible = True # Force pinned memory
|
||||
|
||||
# Bug - disable bn+relu fusion
|
||||
from tensorflow.core.protobuf import rewriter_config_pb2
|
||||
config.graph_options.rewrite_options.remapping = (rewriter_config_pb2.RewriterConfig.OFF)
|
||||
|
||||
if mode == 'train':
|
||||
config.intra_op_parallelism_threads = 1 # Avoid pool of Eigen threads
|
||||
config.inter_op_parallelism_threads = max(2, (multiprocessing.cpu_count() // max(hvd.size(), 8) - 2))
|
||||
|
@ -254,9 +238,8 @@ class Runner(object):
|
|||
def _get_run_config(mode, model_dir, use_xla, use_dali, gpu_memory_fraction, gpu_id=0, seed=None):
|
||||
|
||||
if mode not in ["train", 'validation', 'benchmark', 'inference']:
|
||||
raise ValueError(
|
||||
"Unknown mode received: %s (allowed: 'train', 'validation', 'benchmark', 'inference')" % mode
|
||||
)
|
||||
raise ValueError("Unknown mode received: %s (allowed: 'train', 'validation', 'benchmark', 'inference')" %
|
||||
mode)
|
||||
|
||||
if seed is not None:
|
||||
if hvd_utils.is_using_hvd():
|
||||
|
@ -272,9 +255,11 @@ class Runner(object):
|
|||
save_summary_steps=100 if mode in ['train', 'validation'] else 1e9, # disabled in benchmark mode
|
||||
save_checkpoints_steps=None,
|
||||
save_checkpoints_secs=None,
|
||||
session_config=Runner._get_session_config(
|
||||
mode=mode, use_xla=use_xla, use_dali=use_dali, gpu_memory_fraction=gpu_memory_fraction, gpu_id=gpu_id
|
||||
),
|
||||
session_config=Runner._get_session_config(mode=mode,
|
||||
use_xla=use_xla,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
gpu_id=gpu_id),
|
||||
keep_checkpoint_max=5,
|
||||
keep_checkpoint_every_n_hours=1e6, # disabled
|
||||
log_step_count_steps=1e9,
|
||||
|
@ -282,14 +267,12 @@ class Runner(object):
|
|||
device_fn=None,
|
||||
protocol=None,
|
||||
eval_distribute=None,
|
||||
experimental_distribute=None
|
||||
)
|
||||
experimental_distribute=None)
|
||||
|
||||
if mode == 'train':
|
||||
if hvd_utils.is_using_hvd():
|
||||
config = config.replace(
|
||||
save_checkpoints_steps=1000 if hvd.rank() == 0 else None, keep_checkpoint_every_n_hours=3
|
||||
)
|
||||
config = config.replace(save_checkpoints_steps=1000 if hvd.rank() == 0 else None,
|
||||
keep_checkpoint_every_n_hours=3)
|
||||
else:
|
||||
config = config.replace(save_checkpoints_steps=1000, keep_checkpoint_every_n_hours=3)
|
||||
|
||||
|
@ -298,49 +281,45 @@ class Runner(object):
|
|||
def _get_estimator(self, mode, run_params, use_xla, use_dali, gpu_memory_fraction, gpu_id=0):
|
||||
|
||||
if mode not in ["train", 'validation', 'benchmark', 'inference']:
|
||||
raise ValueError(
|
||||
"Unknown mode received: %s (allowed: 'train', 'validation', 'benchmark', 'inference')" % mode
|
||||
)
|
||||
raise ValueError("Unknown mode received: %s (allowed: 'train', 'validation', 'benchmark', 'inference')" %
|
||||
mode)
|
||||
|
||||
run_config = Runner._get_run_config(
|
||||
mode=mode,
|
||||
model_dir=self.run_hparams.model_dir,
|
||||
use_xla=use_xla,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
gpu_id=gpu_id,
|
||||
seed=self.run_hparams.seed
|
||||
)
|
||||
run_config = Runner._get_run_config(mode=mode,
|
||||
model_dir=self.run_hparams.model_dir,
|
||||
use_xla=use_xla,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
gpu_id=gpu_id,
|
||||
seed=self.run_hparams.seed)
|
||||
|
||||
return tf.estimator.Estimator(
|
||||
model_fn=self._model, model_dir=self.run_hparams.model_dir, config=run_config, params=run_params
|
||||
)
|
||||
return tf.estimator.Estimator(model_fn=self._model,
|
||||
model_dir=self.run_hparams.model_dir,
|
||||
config=run_config,
|
||||
params=run_params)
|
||||
|
||||
def train(
|
||||
self,
|
||||
iter_unit,
|
||||
num_iter,
|
||||
run_iter,
|
||||
batch_size,
|
||||
warmup_steps=50,
|
||||
weight_decay=1e-4,
|
||||
lr_init=0.1,
|
||||
lr_warmup_epochs=5,
|
||||
momentum=0.9,
|
||||
log_every_n_steps=1,
|
||||
loss_scale=256,
|
||||
label_smoothing=0.0,
|
||||
mixup=0.0,
|
||||
use_cosine_lr=False,
|
||||
use_static_loss_scaling=False,
|
||||
is_benchmark=False,
|
||||
quantize=False,
|
||||
symmetric=False,
|
||||
quant_delay=0,
|
||||
finetune_checkpoint=None,
|
||||
use_final_conv=False,
|
||||
use_qdq=False
|
||||
):
|
||||
def train(self,
|
||||
iter_unit,
|
||||
num_iter,
|
||||
run_iter,
|
||||
batch_size,
|
||||
warmup_steps=50,
|
||||
weight_decay=1e-4,
|
||||
lr_init=0.1,
|
||||
lr_warmup_epochs=5,
|
||||
momentum=0.9,
|
||||
log_every_n_steps=1,
|
||||
loss_scale=256,
|
||||
label_smoothing=0.0,
|
||||
mixup=0.0,
|
||||
use_cosine_lr=False,
|
||||
use_static_loss_scaling=False,
|
||||
is_benchmark=False,
|
||||
quantize=False,
|
||||
symmetric=False,
|
||||
quant_delay=0,
|
||||
finetune_checkpoint=None,
|
||||
use_final_conv=False,
|
||||
use_qdq=False):
|
||||
|
||||
if iter_unit not in ["epoch", "batch"]:
|
||||
raise ValueError('`iter_unit` value is unknown: %s (allowed: ["epoch", "batch"])' % iter_unit)
|
||||
|
@ -383,9 +362,8 @@ class Runner(object):
|
|||
run_iter = steps_per_epoch * run_iter if iter_unit == "epoch" else run_iter
|
||||
|
||||
if self.run_hparams.use_dali and self.run_hparams.data_idx_dir is not None:
|
||||
idx_filenames = runner_utils.parse_dali_idx_dataset(
|
||||
data_idx_dir=self.run_hparams.data_idx_dir, mode="train"
|
||||
)
|
||||
idx_filenames = runner_utils.parse_dali_idx_dataset(data_idx_dir=self.run_hparams.data_idx_dir,
|
||||
mode="train")
|
||||
|
||||
training_hooks = []
|
||||
|
||||
|
@ -447,14 +425,12 @@ class Runner(object):
|
|||
if finetune_checkpoint:
|
||||
estimator_params['finetune_checkpoint'] = finetune_checkpoint
|
||||
|
||||
image_classifier = self._get_estimator(
|
||||
mode='train',
|
||||
run_params=estimator_params,
|
||||
use_xla=self.run_hparams.use_xla,
|
||||
use_dali=self.run_hparams.use_dali,
|
||||
gpu_memory_fraction=self.run_hparams.gpu_memory_fraction,
|
||||
gpu_id=self.run_hparams.gpu_id
|
||||
)
|
||||
image_classifier = self._get_estimator(mode='train',
|
||||
run_params=estimator_params,
|
||||
use_xla=self.run_hparams.use_xla,
|
||||
use_dali=self.run_hparams.use_dali,
|
||||
gpu_memory_fraction=self.run_hparams.gpu_memory_fraction,
|
||||
gpu_id=self.run_hparams.gpu_id)
|
||||
|
||||
def training_data_fn():
|
||||
|
||||
|
@ -462,30 +438,26 @@ class Runner(object):
|
|||
if hvd.rank() == 0:
|
||||
print("Using DALI input... ")
|
||||
|
||||
return data_utils.get_dali_input_fn(
|
||||
filenames=filenames,
|
||||
idx_filenames=idx_filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=True,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True
|
||||
)
|
||||
return data_utils.get_dali_input_fn(filenames=filenames,
|
||||
idx_filenames=idx_filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=True,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True)
|
||||
|
||||
elif self.run_hparams.data_dir is not None:
|
||||
|
||||
return data_utils.get_tfrecords_input_fn(
|
||||
filenames=filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=True,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True
|
||||
)
|
||||
return data_utils.get_tfrecords_input_fn(filenames=filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=True,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True)
|
||||
|
||||
else:
|
||||
if hvd.rank() == 0:
|
||||
|
@ -555,14 +527,12 @@ class Runner(object):
|
|||
'use_qdq': use_qdq,
|
||||
'use_final_conv': use_final_conv}
|
||||
|
||||
image_classifier = self._get_estimator(
|
||||
mode='validation',
|
||||
run_params=estimator_params,
|
||||
use_xla=self.run_hparams.use_xla,
|
||||
use_dali=self.run_hparams.use_dali,
|
||||
gpu_memory_fraction=self.run_hparams.gpu_memory_fraction,
|
||||
gpu_id=self.run_hparams.gpu_id
|
||||
)
|
||||
image_classifier = self._get_estimator(mode='validation',
|
||||
run_params=estimator_params,
|
||||
use_xla=self.run_hparams.use_xla,
|
||||
use_dali=self.run_hparams.use_dali,
|
||||
gpu_memory_fraction=self.run_hparams.gpu_memory_fraction,
|
||||
gpu_id=self.run_hparams.gpu_id)
|
||||
|
||||
if self.run_hparams.data_dir is not None:
|
||||
filenames, num_samples, num_steps, num_epochs, num_decay_steps = runner_utils.parse_tfrecords_dataset(
|
||||
|
@ -579,9 +549,8 @@ class Runner(object):
|
|||
num_steps = num_iter
|
||||
|
||||
if self.run_hparams.use_dali and self.run_hparams.data_idx_dir is not None:
|
||||
idx_filenames = runner_utils.parse_dali_idx_dataset(
|
||||
data_idx_dir=self.run_hparams.data_idx_dir, mode="validation"
|
||||
)
|
||||
idx_filenames = runner_utils.parse_dali_idx_dataset(data_idx_dir=self.run_hparams.data_idx_dir,
|
||||
mode="validation")
|
||||
|
||||
eval_hooks = []
|
||||
|
||||
|
@ -603,29 +572,25 @@ class Runner(object):
|
|||
if hvd.rank() == 0:
|
||||
print("Using DALI input... ")
|
||||
|
||||
return data_utils.get_dali_input_fn(
|
||||
filenames=filenames,
|
||||
idx_filenames=idx_filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=False,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True
|
||||
)
|
||||
return data_utils.get_dali_input_fn(filenames=filenames,
|
||||
idx_filenames=idx_filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=False,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True)
|
||||
|
||||
elif self.run_hparams.data_dir is not None:
|
||||
return data_utils.get_tfrecords_input_fn(
|
||||
filenames=filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=False,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True
|
||||
)
|
||||
return data_utils.get_tfrecords_input_fn(filenames=filenames,
|
||||
batch_size=batch_size,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
training=False,
|
||||
distort_color=self.run_hparams.distort_colors,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads,
|
||||
deterministic=False if self.run_hparams.seed is None else True)
|
||||
|
||||
else:
|
||||
print("Using Synthetic Data ...\n")
|
||||
|
@ -651,29 +616,25 @@ class Runner(object):
|
|||
eval_latencies_q = np.quantile(eval_latencies, q=[0.9, 0.95, 0.99])
|
||||
eval_latencies_mean = np.mean(eval_latencies)
|
||||
|
||||
dllogger.log(
|
||||
data={
|
||||
'top1_accuracy': float(eval_results['top1_accuracy']),
|
||||
'top5_accuracy': float(eval_results['top5_accuracy']),
|
||||
'eval_throughput': eval_throughput,
|
||||
'eval_latency_avg': eval_latencies_mean,
|
||||
'eval_latency_p90': eval_latencies_q[0],
|
||||
'eval_latency_p95': eval_latencies_q[1],
|
||||
'eval_latency_p99': eval_latencies_q[2],
|
||||
},
|
||||
step=tuple()
|
||||
)
|
||||
dllogger.log(data={
|
||||
'top1_accuracy': float(eval_results['top1_accuracy']),
|
||||
'top5_accuracy': float(eval_results['top5_accuracy']),
|
||||
'eval_throughput': eval_throughput,
|
||||
'eval_latency_avg': eval_latencies_mean,
|
||||
'eval_latency_p90': eval_latencies_q[0],
|
||||
'eval_latency_p95': eval_latencies_q[1],
|
||||
'eval_latency_p99': eval_latencies_q[2],
|
||||
},
|
||||
step=tuple())
|
||||
|
||||
if export_dir is not None:
|
||||
dllogger.log(data={'export_dir': export_dir}, step=tuple())
|
||||
input_receiver_fn = data_utils.get_serving_input_receiver_fn(
|
||||
batch_size=None,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
num_channels=self.run_hparams.n_channels,
|
||||
data_format=self.run_hparams.input_format,
|
||||
dtype=self.run_hparams.dtype
|
||||
)
|
||||
input_receiver_fn = data_utils.get_serving_input_receiver_fn(batch_size=None,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
num_channels=self.run_hparams.n_channels,
|
||||
data_format=self.run_hparams.input_format,
|
||||
dtype=self.run_hparams.dtype)
|
||||
|
||||
image_classifier.export_savedmodel(export_dir, input_receiver_fn)
|
||||
|
||||
|
@ -684,33 +645,35 @@ class Runner(object):
|
|||
|
||||
def predict(self, to_predict, quantize=False, symmetric=False, use_qdq=False, use_final_conv=False):
|
||||
|
||||
estimator_params = {'quantize': quantize, 'symmetric': symmetric, 'use_qdq': use_qdq, 'use_final_conv': use_final_conv}
|
||||
estimator_params = {
|
||||
'quantize': quantize,
|
||||
'symmetric': symmetric,
|
||||
'use_qdq': use_qdq,
|
||||
'use_final_conv': use_final_conv
|
||||
}
|
||||
|
||||
if to_predict is not None:
|
||||
filenames = runner_utils.parse_inference_input(to_predict)
|
||||
|
||||
image_classifier = self._get_estimator(
|
||||
mode='inference',
|
||||
run_params=estimator_params,
|
||||
use_xla=self.run_hparams.use_xla,
|
||||
use_dali=self.run_hparams.use_dali,
|
||||
gpu_memory_fraction=self.run_hparams.gpu_memory_fraction
|
||||
)
|
||||
image_classifier = self._get_estimator(mode='inference',
|
||||
run_params=estimator_params,
|
||||
use_xla=self.run_hparams.use_xla,
|
||||
use_dali=self.run_hparams.use_dali,
|
||||
gpu_memory_fraction=self.run_hparams.gpu_memory_fraction)
|
||||
|
||||
inference_hooks = []
|
||||
|
||||
def inference_data_fn():
|
||||
return data_utils.get_inference_input_fn(
|
||||
filenames=filenames,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads
|
||||
)
|
||||
return data_utils.get_inference_input_fn(filenames=filenames,
|
||||
height=self.run_hparams.height,
|
||||
width=self.run_hparams.width,
|
||||
num_threads=self.run_hparams.num_preprocessing_threads)
|
||||
|
||||
try:
|
||||
inference_results = image_classifier.predict(
|
||||
input_fn=inference_data_fn, predict_keys=None, hooks=inference_hooks, yield_single_examples=True
|
||||
)
|
||||
inference_results = image_classifier.predict(input_fn=inference_data_fn,
|
||||
predict_keys=None,
|
||||
hooks=inference_hooks,
|
||||
yield_single_examples=True)
|
||||
|
||||
for result in inference_results:
|
||||
print(result['classes'], str(result['probabilities'][result['classes']]))
|
||||
|
|
|
@ -48,13 +48,13 @@ def list_filenames_in_dataset(data_dir, mode, count=True):
|
|||
|
||||
filename_pattern = os.path.join(data_dir, '%s-*' % mode)
|
||||
|
||||
file_list = sorted(tf.gfile.Glob(filename_pattern))
|
||||
file_list = sorted(tf.compat.v1.gfile.Glob(filename_pattern))
|
||||
num_samples = 0
|
||||
|
||||
if count:
|
||||
def count_records(tf_record_filename):
|
||||
count = 0
|
||||
for _ in tf.python_io.tf_record_iterator(tf_record_filename):
|
||||
for _ in tf.compat.v1.io.tf_record_iterator(tf_record_filename):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
|
|
@ -246,16 +246,16 @@ For example, to train on DGX-1 for 90 epochs using AMP, run:
|
|||
Additionally, features like DALI data preprocessing or TensorFlow XLA can be enabled with
|
||||
following arguments when running those scripts:
|
||||
|
||||
`bash ./se-resnext101-32x4d/training/DGX1_SE-RNxt101-32x4d_AMP_90E.sh /path/to/result /data/ --use_xla --use_dali`
|
||||
`bash ./se-resnext101-32x4d/training/DGX1_SE-RNxt101-32x4d_AMP_90E.sh /path/to/result /data/ --xla --dali`
|
||||
|
||||
7. Start validation/evaluation.
|
||||
To evaluate the validation dataset located in `/data/tfrecords`, run `main.py` with
|
||||
`--mode=evaluate`. For example:
|
||||
|
||||
`python main.py --arch=se-resnext101-32x4d --mode=evaluate --data_dir=/data/tfrecords --batch_size <batch size> --model_dir
|
||||
<model location> --results_dir <output location> [--use_xla] [--use_tf_amp]`
|
||||
<model location> --results_dir <output location> [--xla] [--amp]`
|
||||
|
||||
The optional `--use_xla` and `--use_tf_amp` flags control XLA and AMP during evaluation.
|
||||
The optional `--xla` and `--amp` flags control XLA and AMP during evaluation.
|
||||
|
||||
## Advanced
|
||||
|
||||
|
@ -294,95 +294,116 @@ The `runtime/` directory contains the following module that define the mechanics
|
|||
The script for training and evaluating the ResNext101-32x4d model has a variety of parameters that control these processes.
|
||||
|
||||
```
|
||||
usage: main.py [-h]
|
||||
[--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}]
|
||||
usage: main.py [-h] [--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}]
|
||||
[--mode {train,train_and_evaluate,evaluate,predict,training_benchmark,inference_benchmark}]
|
||||
[--data_dir DATA_DIR] [--data_idx_dir DATA_IDX_DIR]
|
||||
[--export_dir EXPORT_DIR] [--to_predict TO_PREDICT]
|
||||
[--batch_size BATCH_SIZE] [--num_iter NUM_ITER]
|
||||
[--iter_unit {epoch,batch}] [--warmup_steps WARMUP_STEPS]
|
||||
[--model_dir MODEL_DIR] [--results_dir RESULTS_DIR]
|
||||
[--log_filename LOG_FILENAME] [--display_every DISPLAY_EVERY]
|
||||
[--lr_init LR_INIT] [--lr_warmup_epochs LR_WARMUP_EPOCHS]
|
||||
[--weight_decay WEIGHT_DECAY] [--weight_init {fan_in,fan_out}]
|
||||
[--momentum MOMENTUM] [--loss_scale LOSS_SCALE]
|
||||
[--label_smoothing LABEL_SMOOTHING] [--mixup MIXUP]
|
||||
[--use_static_loss_scaling | --nouse_static_loss_scaling]
|
||||
[--use_xla | --nouse_xla] [--use_dali | --nouse_dali]
|
||||
[--use_tf_amp | --nouse_tf_amp]
|
||||
[--use_cosine_lr | --nouse_cosine_lr] [--seed SEED]
|
||||
[--export_dir EXPORT_DIR] [--to_predict TO_PREDICT]
|
||||
--batch_size BATCH_SIZE [--num_iter NUM_ITER]
|
||||
[--run_iter RUN_ITER] [--iter_unit {epoch,batch}]
|
||||
[--warmup_steps WARMUP_STEPS] [--model_dir MODEL_DIR]
|
||||
[--results_dir RESULTS_DIR] [--log_filename LOG_FILENAME]
|
||||
[--display_every DISPLAY_EVERY] [--seed SEED]
|
||||
[--gpu_memory_fraction GPU_MEMORY_FRACTION] [--gpu_id GPU_ID]
|
||||
|
||||
JoC-RN50v1.5-TF
|
||||
|
||||
optional arguments:
|
||||
-h, --help Show this help message and exit
|
||||
[--finetune_checkpoint FINETUNE_CHECKPOINT] [--use_final_conv]
|
||||
[--quant_delay QUANT_DELAY] [--quantize] [--use_qdq]
|
||||
[--symmetric] [--data_dir DATA_DIR]
|
||||
[--data_idx_dir DATA_IDX_DIR] [--dali]
|
||||
[--synthetic_data_size SYNTHETIC_DATA_SIZE] [--lr_init LR_INIT]
|
||||
[--lr_warmup_epochs LR_WARMUP_EPOCHS]
|
||||
[--weight_decay WEIGHT_DECAY] [--weight_init {fan_in,fan_out}]
|
||||
[--momentum MOMENTUM] [--label_smoothing LABEL_SMOOTHING]
|
||||
[--mixup MIXUP] [--cosine_lr] [--xla]
|
||||
[--data_format {NHWC,NCHW}] [--amp]
|
||||
[--static_loss_scale STATIC_LOSS_SCALE]
|
||||
|
||||
JoC-RN50v1.5-TF
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit.
|
||||
--arch {resnet50,resnext101-32x4d,se-resnext101-32x4d}
|
||||
Architecture of model to run (to run se-resnext-32x4d set
|
||||
--arch=se-rensext101-32x4d)
|
||||
Architecture of model to run.
|
||||
--mode {train,train_and_evaluate,evaluate,predict,training_benchmark,inference_benchmark}
|
||||
The execution mode of the script.
|
||||
--export_dir EXPORT_DIR
|
||||
Directory in which to write exported SavedModel.
|
||||
--to_predict TO_PREDICT
|
||||
Path to file or directory of files to run prediction
|
||||
on.
|
||||
--batch_size BATCH_SIZE
|
||||
Size of each minibatch per GPU.
|
||||
--num_iter NUM_ITER Number of iterations to run.
|
||||
--run_iter RUN_ITER Number of training iterations to run on single run.
|
||||
--iter_unit {epoch,batch}
|
||||
Unit of iterations.
|
||||
--warmup_steps WARMUP_STEPS
|
||||
Number of steps considered as warmup and not taken
|
||||
into account for performance measurements.
|
||||
--model_dir MODEL_DIR
|
||||
Directory in which to write model. If undefined,
|
||||
results dir will be used.
|
||||
--results_dir RESULTS_DIR
|
||||
Directory in which to write training logs, summaries
|
||||
and checkpoints.
|
||||
--log_filename LOG_FILENAME
|
||||
Name of the JSON file to which write the training log.
|
||||
--display_every DISPLAY_EVERY
|
||||
How often (in batches) to print out running
|
||||
information.
|
||||
--seed SEED Random seed.
|
||||
--gpu_memory_fraction GPU_MEMORY_FRACTION
|
||||
Limit memory fraction used by training script for DALI.
|
||||
--gpu_id GPU_ID Specify ID of the target GPU on multi-device platform.
|
||||
Effective only for single-GPU mode.
|
||||
--finetune_checkpoint FINETUNE_CHECKPOINT
|
||||
Path to pre-trained checkpoint which will be used for
|
||||
fine-tuning.
|
||||
--use_final_conv Use convolution operator instead of MLP as last layer.
|
||||
--quant_delay QUANT_DELAY
|
||||
Number of steps to be run before quantization starts
|
||||
to happen.
|
||||
--quantize Quantize weights and activations during training.
|
||||
(Defaults to Assymmetric quantization)
|
||||
--use_qdq Use QDQV3 op instead of FakeQuantWithMinMaxVars op for
|
||||
quantization. QDQv3 does only scaling.
|
||||
--symmetric Quantize weights and activations during training using
|
||||
symmetric quantization.
|
||||
|
||||
Dataset arguments:
|
||||
--data_dir DATA_DIR Path to dataset in TFRecord format. Files should be
|
||||
named 'train-*' and 'validation-*'.
|
||||
--data_idx_dir DATA_IDX_DIR
|
||||
Path to index files for DALI. Files should be named
|
||||
'train-*' and 'validation-*'.
|
||||
--export_dir EXPORT_DIR
|
||||
Directory in which to write exported SavedModel.
|
||||
--to_predict TO_PREDICT
|
||||
Path to file or directory of files to run prediction
|
||||
on.
|
||||
--batch_size BATCH_SIZE
|
||||
Size of each minibatch per GPU.
|
||||
--num_iter NUM_ITER Number of iterations to run.
|
||||
--iter_unit {epoch,batch}
|
||||
Unit of iterations.
|
||||
--warmup_steps WARMUP_STEPS
|
||||
Number of steps considered as warmup and not taken
|
||||
into account for performance measurements.
|
||||
--model_dir MODEL_DIR
|
||||
Directory in which to write the model. If undefined,
|
||||
results directory will be used.
|
||||
--results_dir RESULTS_DIR
|
||||
Directory in which to write training logs, summaries
|
||||
and checkpoints.
|
||||
--log_filename LOG_FILENAME
|
||||
Name of the JSON file to which write the training log
|
||||
--display_every DISPLAY_EVERY
|
||||
How often (in batches) to print out running
|
||||
information.
|
||||
--dali Enable DALI data input.
|
||||
--synthetic_data_size SYNTHETIC_DATA_SIZE
|
||||
Dimension of image for synthetic dataset.
|
||||
|
||||
Training arguments:
|
||||
--lr_init LR_INIT Initial value for the learning rate.
|
||||
--lr_warmup_epochs LR_WARMUP_EPOCHS
|
||||
Number of warmup epochs for the learning rate schedule.
|
||||
Number of warmup epochs for learning rate schedule.
|
||||
--weight_decay WEIGHT_DECAY
|
||||
Weight Decay scale factor.
|
||||
--weight_init {fan_in,fan_out}
|
||||
Model weight initialization method.
|
||||
--momentum MOMENTUM SGD momentum value for the momentum optimizer.
|
||||
--loss_scale LOSS_SCALE
|
||||
Loss scale for FP16 training and fast math FP32.
|
||||
--momentum MOMENTUM SGD momentum value for the Momentum optimizer.
|
||||
--label_smoothing LABEL_SMOOTHING
|
||||
The value of label smoothing.
|
||||
--mixup MIXUP The alpha parameter for mixup (if 0 then mixup is not
|
||||
applied).
|
||||
--use_static_loss_scaling
|
||||
Use static loss scaling in FP16 or FP32 AMP.
|
||||
--nouse_static_loss_scaling
|
||||
--use_xla Enable XLA (Accelerated Linear Algebra) computation
|
||||
--cosine_lr Use cosine learning rate schedule.
|
||||
|
||||
Generic optimization arguments:
|
||||
--xla Enable XLA (Accelerated Linear Algebra) computation
|
||||
for improved performance.
|
||||
--nouse_xla
|
||||
--use_dali Enable DALI data input.
|
||||
--nouse_dali
|
||||
--use_tf_amp Enable AMP to speedup FP32
|
||||
computation using Tensor Cores.
|
||||
--nouse_tf_amp
|
||||
--use_cosine_lr Use cosine learning rate schedule.
|
||||
--nouse_cosine_lr
|
||||
--seed SEED Random seed.
|
||||
--gpu_memory_fraction GPU_MEMORY_FRACTION
|
||||
Limit memory fraction used by the training script for DALI
|
||||
--gpu_id GPU_ID Specify the ID of the target GPU on a multi-device platform.
|
||||
Effective only for single-GPU mode.
|
||||
--data_format {NHWC,NCHW}
|
||||
Data format used to do calculations.
|
||||
--amp Enable Automatic Mixed Precision to speedup
|
||||
computation using tensor cores.
|
||||
|
||||
Automatic Mixed Precision arguments:
|
||||
--static_loss_scale STATIC_LOSS_SCALE
|
||||
Use static loss scaling in FP32 AMP.
|
||||
|
||||
```
|
||||
|
||||
### Inference process
|
||||
|
@ -390,7 +411,7 @@ To run inference on a single example with a checkpoint and a model script, use:
|
|||
|
||||
`python main.py --arch=se-resnext101-32x4d --mode predict --model_dir <path to model> --to_predict <path to image> --results_dir <path to results>`
|
||||
|
||||
The optional `--use_xla` and `--use_tf_amp` flags control XLA and AMP during inference.
|
||||
The optional `--xla` and `--amp` flags control XLA and AMP during inference.
|
||||
|
||||
## Performance
|
||||
|
||||
|
@ -409,7 +430,7 @@ To benchmark the training performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`python ./main.py --arch=se-resnext101-32x4d --mode=training_benchmark --use_tf_amp --warmup_steps 200 --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`python ./main.py --arch=se-resnext101-32x4d --mode=training_benchmark --amp --warmup_steps 200 --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
* For multiple GPUs
|
||||
* FP32 / TF32
|
||||
|
@ -418,16 +439,17 @@ To benchmark the training performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`mpiexec --allow-run-as-root --bind-to socket -np <num_gpus> python ./main.py --arch=se-resnext101-32x4d --mode=training_benchmark --use_tf_amp --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`mpiexec --allow-run-as-root --bind-to socket -np <num_gpus> python ./main.py --arch=se-resnext101-32x4d --mode=training_benchmark --amp --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
|
||||
Each of these scripts runs 200 warm-up iterations and measures the first epoch.
|
||||
|
||||
To control warmup and benchmark length, use the `--warmup_steps`, `--num_iter` and `--iter_unit` flags. Features like XLA or DALI can be controlled
|
||||
with `--use_xla` and `--use_dali` flags. If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset.
|
||||
For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
with `--xla` and `--dali` flags. For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
Suggested batch sizes for training are 96 for mixed precision training and 64 for single precision training per single V100 16 GB.
|
||||
|
||||
If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset. The resolution of synthetic images used can be controlled with `--synthetic_data_size` flag.
|
||||
|
||||
|
||||
#### Inference performance benchmark
|
||||
|
||||
|
@ -439,11 +461,10 @@ To benchmark the inference performance on a specific batch size, run:
|
|||
|
||||
* AMP
|
||||
|
||||
`python ./main.py --arch=se-resnext101-32x4d --mode=inference_benchmark --use_tf_amp --warmup_steps 20 --num_iter 100 --iter_unit batch --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
`python ./main.py --arch=se-resnext101-32x4d --mode=inference_benchmark --amp --warmup_steps 20 --num_iter 100 --iter_unit batch --batch_size <batch size> --data_dir=<path to imagenet> --results_dir=<path to results directory>`
|
||||
|
||||
By default, each of these scripts runs 20 warm-up iterations and measures the next 80 iterations.
|
||||
To control warm-up and benchmark length, use the `--warmup_steps`, `--num_iter` and `--iter_unit` flags.
|
||||
For proper throughput and latency reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
If no `--data_dir=<path to imagenet>` flag is specified then the benchmarks will use a synthetic dataset.
|
||||
|
||||
The benchmark can be automated with the `inference_benchmark.sh` script provided in `se-resnext101-32x4d`, by simply running:
|
||||
|
@ -452,6 +473,9 @@ The benchmark can be automated with the `inference_benchmark.sh` script provided
|
|||
The `<data dir>` parameter refers to the input data directory (by default `/data/tfrecords` inside the container).
|
||||
By default, the benchmark tests the following configurations: **FP32**, **AMP**, **AMP + XLA** with different batch sizes.
|
||||
When the optional directory with the DALI index files `<data idx dir>` is specified, the benchmark executes an additional **DALI + AMP + XLA** configuration.
|
||||
For proper throughput reporting the value of `--num_iter` must be greater than `--warmup_steps` value.
|
||||
|
||||
For performance benchamrk of raw model, synthetic dataset can be used. To use synthetic dataset, use `--synthetic_data_size` flag instead of `--data_dir` to specify input image size.
|
||||
|
||||
### Results
|
||||
|
||||
|
@ -761,6 +785,9 @@ on NVIDIA T4 with (1x T4 16G) GPU.
|
|||
|
||||
April 2020
|
||||
- Initial release
|
||||
August 2020
|
||||
- Updated command line argument names
|
||||
- Added support for syntetic dataset with different image size
|
||||
|
||||
### Known issues
|
||||
Performance without XLA enabled is low. We recommend using XLA.
|
||||
Performance without XLA enabled is low due to BN + ReLU fusion bug.
|
||||
|
|
|
@ -22,12 +22,12 @@ function test_configuration() {
|
|||
}
|
||||
|
||||
test_configuration "FP32 nodali noxla"
|
||||
test_configuration "FP32 nodali xla" "--use_xla"
|
||||
test_configuration "FP16 nodali noxla" "--use_tf_amp"
|
||||
test_configuration "FP16 nodali xla" "--use_tf_amp --use_xla"
|
||||
test_configuration "FP32 nodali xla" "--xla"
|
||||
test_configuration "FP16 nodali noxla" "--amp"
|
||||
test_configuration "FP16 nodali xla" "--amp --xla"
|
||||
|
||||
if [ ! -z $DALI_DIR ]; then
|
||||
test_configuration "FP16 dali xla" "--use_tf_amp --use_xla --use_dali --data_idx_dir ${DALI_DIR}"
|
||||
test_configuration "FP16 dali xla" "--amp --xla --dali --data_idx_dir ${DALI_DIR}"
|
||||
fi
|
||||
|
||||
cat $INFERENCE_BENCHMARK
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=96 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=96 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=96 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=96 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=96 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=96 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 16 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=96 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=96 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp --use_static_loss_scaling --loss_scale 128 \
|
||||
--amp --static_loss_scale 128 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=250 --muxup=0.2 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 16 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=64 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=64 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -25,9 +25,9 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=256 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=256 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--use_tf_amp \
|
||||
--amp \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ fi
|
|||
|
||||
mpiexec --allow-run-as-root ${BIND_TO_SOCKET} -np 8 python3 main.py --arch=se-resnext101-32x4d \
|
||||
--mode=train_and_evaluate --iter_unit=epoch --num_iter=90 \
|
||||
--batch_size=128 --warmup_steps=100 --use_cosine --label_smoothing 0.1 \
|
||||
--batch_size=128 --warmup_steps=100 --cosine_lr --label_smoothing 0.1 \
|
||||
--lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=6.103515625e-05 \
|
||||
--data_dir=${DATA_DIR}/tfrecords --data_idx_dir=${DATA_DIR}/dali_idx \
|
||||
--results_dir=${WORKSPACE}/results --weight_init=fan_in ${OTHER}
|
||||
|
|
|
@ -26,13 +26,13 @@ function run_benchmark() {
|
|||
MODE_SIZE=$2
|
||||
|
||||
if [[ $4 -eq "1" ]]; then
|
||||
XLA="--use_xla"
|
||||
XLA="--xla"
|
||||
else
|
||||
XLA=""
|
||||
fi
|
||||
|
||||
case $2 in
|
||||
"amp") MODE_FLAGS="--use_tf_amp --use_static_loss_scaling --loss_scale=128";;
|
||||
"amp") MODE_FLAGS="--amp --static_loss_scale=128";;
|
||||
"fp32"|"tf32") MODE_FLAGS="";;
|
||||
*) echo "Unsupported configuration, use amp, tf32 or fp32";;
|
||||
esac
|
||||
|
|
687
TensorFlow/Classification/ConvNets/triton/README.md
Normal file
|
@ -0,0 +1,687 @@
|
|||
# Deploying the ResNet-50 v1.5 model on Triton Inference Server
|
||||
|
||||
This folder contains instructions for deployment to run inference
|
||||
on Triton Inference Server as well as a detailed performance analysis.
|
||||
The purpose of this document is to help you with achieving
|
||||
the best inference performance.
|
||||
|
||||
## Table of contents
|
||||
|
||||
- [Solution overview](#solution-overview)
|
||||
- [Introduction](#introduction)
|
||||
- [Deployment process](#deployment-process)
|
||||
- [Setup](#setup)
|
||||
- [Quick Start Guide](#quick-start-guide)
|
||||
- [Advanced](#advanced)
|
||||
- [Prepare configuration](#prepare-configuration)
|
||||
- [Latency explanation](#latency-explanation)
|
||||
- [Performance](#performance)
|
||||
- [Offline scenario](#offline-scenario)
|
||||
- [Offline: NVIDIA A40, TF-TRT with FP16](#offline-nvidia-a40-tf-trt-with-fp16)
|
||||
- [Offline: NVIDIA DGX A100 (1x A100 80GB), TF-TRT with FP16](#offline-nvidia-dgx-a100-1x-a100-80gb-tf-trt-with-fp16)
|
||||
- [Offline: NVIDIA DGX-1 (1x V100 32GB), TF-TRT with FP16](#offline-nvidia-dgx-1-1x-v100-32gb-tf-trt-with-fp16)
|
||||
- [Offline: NVIDIA T4, TF-TRT with FP16](#offline-nvidia-t4-tf-trt-with-fp16)
|
||||
- [Online scenario](#online-scenario)
|
||||
- [Online: NVIDIA A40, TF-TRT with FP16](#online-nvidia-a40-tf-trt-with-fp16)
|
||||
- [Online: NVIDIA DGX A100 (1x A100 80GB), TF-TRT with FP16](#online-nvidia-dgx-a100-1x-a100-80gb-tf-trt-with-fp16)
|
||||
- [Online: NVIDIA DGX-1 (1x V100 32GB), TF-TRT with FP16](#online-nvidia-dgx-1-1x-v100-32gb-tf-trt-with-fp16)
|
||||
- [Online: NVIDIA T4, TF-TRT with FP16](#online-nvidia-t4-tf-trt-with-fp16)
|
||||
- [Release Notes](#release-notes)
|
||||
- [Changelog](#changelog)
|
||||
- [Known issues](#known-issues)
|
||||
|
||||
|
||||
|
||||
|
||||
## Solution overview
|
||||
|
||||
|
||||
### Introduction
|
||||
The [NVIDIA Triton Inference Server](https://github.com/NVIDIA/triton-inference-server)
|
||||
provides a datacenter and cloud inferencing solution optimized for NVIDIA GPUs.
|
||||
The server provides an inference service via an HTTP or gRPC endpoint,
|
||||
allowing remote clients to request inferencing for any number of GPU
|
||||
or CPU models being managed by the server.
|
||||
|
||||
This README provides step-by-step deployment instructions for models generated
|
||||
during training (as described in the [model README](../README.md)).
|
||||
Additionally, this README provides the corresponding deployment scripts that
|
||||
ensure optimal GPU utilization during inferencing on Triton Inference Server.
|
||||
|
||||
### Deployment process
|
||||
The deployment process consists of two steps:
|
||||
|
||||
1. Conversion. The purpose of conversion is to find the best performing model
|
||||
format supported by Triton Inference Server.
|
||||
Triton Inference Server uses a number of runtime backends such as
|
||||
[TensorRT](https://developer.nvidia.com/tensorrt),
|
||||
[TensorFlow](https://github.com/triton-inference-server/tensorflow_backend) and
|
||||
[ONNX Runtime](https://github.com/triton-inference-server/onnxruntime_backend)
|
||||
to support various model types. Refer to
|
||||
[Triton documentation](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
|
||||
for a list of available backends.
|
||||
2. Configuration. Model configuration on Triton Inference Server, which generates
|
||||
necessary [configuration files](https://github.com/triton-inference-server/server/blob/master/docs/model_configuration.md).
|
||||
|
||||
To run benchmarks measuring the model performance in inference,
|
||||
perform the following steps:
|
||||
|
||||
1. Start the Triton Inference Server.
|
||||
|
||||
The Triton Inference Server container is started
|
||||
in one (possibly remote) container and ports for gRPC or REST API are exposed.
|
||||
|
||||
2. Run accuracy tests.
|
||||
|
||||
Produce results which are tested against given accuracy thresholds.
|
||||
Refer to step 8 in the [Quick Start Guide](#quick-start-guide).
|
||||
|
||||
3. Run performance tests.
|
||||
|
||||
Produce latency and throughput results for offline (static batching)
|
||||
and online (dynamic batching) scenarios.
|
||||
Refer to step 11 in the [Quick Start Guide](#quick-start-guide).
|
||||
|
||||
|
||||
## Setup
|
||||
|
||||
|
||||
|
||||
Ensure you have the following components:
|
||||
* [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker)
|
||||
* [TensorFlow1 NGC container 20.12](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
|
||||
* [Triton Inference Server NGC container 20.12](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver)
|
||||
* [NVIDIA CUDA repository](https://docs.nvidia.com/cuda/archive/11.1.1/index.html)
|
||||
* [NVIDIA Ampere](https://www.nvidia.com/en-us/data-center/nvidia-ampere-gpu-architecture/), [Volta](https://www.nvidia.com/en-us/data-center/volta-gpu-architecture/) or [Turing](https://www.nvidia.com/en-us/geforce/turing/) based GPU
|
||||
|
||||
|
||||
|
||||
## Quick Start Guide
|
||||
Running the following scripts will build and launch the container with all
|
||||
required dependencies for native TensorFlow as well as Triton Inference Server.
|
||||
This is necessary for running inference and can also be used for data download,
|
||||
processing, and training of the model.
|
||||
|
||||
1. Clone the repository.
|
||||
IMPORTANT: This step is executed on the host computer.
|
||||
|
||||
```
|
||||
git clone https://github.com/NVIDIA/DeepLearningExamples.git
|
||||
cd DeepLearningExamples/TensorFlow/Classification/ConvNets
|
||||
```
|
||||
2. Setup the environment in host PC and start Triton Inference Server.
|
||||
|
||||
```
|
||||
source triton/scripts/setup_environment.sh
|
||||
bash triton/scripts/docker/triton_inference_server.sh
|
||||
```
|
||||
|
||||
3. Build and run a container that extends the NGC TensorFlow container with
|
||||
the Triton Inference Server client libraries and dependencies.
|
||||
|
||||
```
|
||||
bash triton/scripts/docker/build.sh
|
||||
bash triton/scripts/docker/interactive.sh
|
||||
```
|
||||
|
||||
|
||||
4. Prepare the deployment configuration and create folders in Docker.
|
||||
|
||||
IMPORTANT: These and the following commands must be executed in the TensorFlow NGC container.
|
||||
|
||||
|
||||
```
|
||||
source triton/scripts/setup_environment.sh
|
||||
```
|
||||
|
||||
5. Download and pre-process the dataset.
|
||||
|
||||
|
||||
```
|
||||
bash triton/scripts/download_data.sh
|
||||
bash triton/scripts/process_dataset.sh
|
||||
```
|
||||
|
||||
6. Setup the parameters for deployment.
|
||||
|
||||
```
|
||||
source triton/scripts/setup_parameters.sh
|
||||
```
|
||||
|
||||
7. Convert the model from training to inference format (e.g. TensorRT).
|
||||
|
||||
```
|
||||
python3 triton/convert_model.py \
|
||||
--input-path triton/rn50_model.py \
|
||||
--input-type tf-estimator \
|
||||
--output-path ${SHARED_DIR}/model \
|
||||
--output-type ${FORMAT} \
|
||||
--onnx-opset 12 \
|
||||
--onnx-optimized 1 \
|
||||
--max-batch-size ${MAX_BATCH_SIZE} \
|
||||
--max-workspace-size 4294967296 \
|
||||
--ignore-unknown-parameters \
|
||||
\
|
||||
--model-dir ${CHECKPOINT_DIR} \
|
||||
--precision ${PRECISION} \
|
||||
--dataloader triton/dataloader.py \
|
||||
--data-dir ${DATASETS_DIR}/imagenet
|
||||
```
|
||||
|
||||
8. Run the model accuracy tests in framework.
|
||||
|
||||
```
|
||||
python3 triton/run_inference_on_fw.py \
|
||||
--input-path ${SHARED_DIR}/model \
|
||||
--input-type ${FORMAT} \
|
||||
--dataloader triton/dataloader.py \
|
||||
--data-dir ${DATASETS_DIR}/imagenet \
|
||||
--images-num 256 \
|
||||
--batch-size ${MAX_BATCH_SIZE} \
|
||||
--output-dir ${SHARED_DIR}/correctness_dump \
|
||||
--dump-labels
|
||||
|
||||
python3 triton/calculate_metrics.py \
|
||||
--dump-dir ${SHARED_DIR}/correctness_dump \
|
||||
--metrics triton/metrics.py \
|
||||
--output-used-for-metrics classes \
|
||||
--csv ${SHARED_DIR}/correctness_metrics.csv
|
||||
|
||||
cat ${SHARED_DIR}/correctness_metrics.csv
|
||||
|
||||
```
|
||||
|
||||
9. Configure the model on Triton Inference Server.
|
||||
|
||||
Generate the configuration from your model repository.
|
||||
|
||||
```
|
||||
python3 triton/config_model_on_trion.py \
|
||||
--model-repository ${MODEL_REPOSITORY_PATH} \
|
||||
--model-path ${SHARED_DIR}/model \
|
||||
--model-format ${FORMAT} \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--model-version 1 \
|
||||
--max-batch-size ${MAX_BATCH_SIZE} \
|
||||
--precision ${PRECISION} \
|
||||
--number-of-model-instances ${NUMBER_OF_MODEL_INSTANCES} \
|
||||
--max-queue-delay-us ${TRITON_MAX_QUEUE_DELAY} \
|
||||
--preferred-batch-sizes ${TRITON_PREFERRED_BATCH_SIZES} \
|
||||
--capture-cuda-graph 0 \
|
||||
--backend-accelerator ${BACKEND_ACCELERATOR} \
|
||||
--load-model ${TRITON_LOAD_MODEL_METHOD}
|
||||
```
|
||||
|
||||
10. Run the Triton Inference Server accuracy tests.
|
||||
|
||||
```
|
||||
python3 triton/run_inference_on_triton.py \
|
||||
--server-url localhost:8001 \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--model-version 1 \
|
||||
--dataloader triton/dataloader.py \
|
||||
--data-dir ${DATASETS_DIR}/imagenet \
|
||||
--batch-size ${MAX_BATCH_SIZE} \
|
||||
--output-dir ${SHARED_DIR}/accuracy_dump \
|
||||
--dump-labels
|
||||
|
||||
python3 triton/calculate_metrics.py \
|
||||
--dump-dir ${SHARED_DIR}/accuracy_dump \
|
||||
--metrics triton/metrics.py \
|
||||
--output-used-for-metrics classes \
|
||||
--csv ${SHARED_DIR}/accuracy_metrics.csv
|
||||
|
||||
cat ${SHARED_DIR}/accuracy_metrics.csv
|
||||
```
|
||||
|
||||
|
||||
11. Run the Triton Inference Server performance online tests.
|
||||
|
||||
We want to maximize throughput within latency budget constraints.
|
||||
Dynamic batching is a feature of Triton Inference Server that allows
|
||||
inference requests to be combined by the server, so that a batch is
|
||||
created dynamically, resulting in a reduced average latency.
|
||||
You can set the Dynamic Batcher parameter `max_queue_delay_microseconds` to
|
||||
indicate the maximum amount of time you are willing to wait and
|
||||
`preferred_batch_size` to indicate your maximum server batch size
|
||||
in the Triton Inference Server model configuration. The measurements
|
||||
presented below set the maximum latency to zero to achieve the best latency
|
||||
possible with good performance.
|
||||
|
||||
|
||||
```
|
||||
python triton/run_offline_performance_test_on_triton.py \
|
||||
--server-url ${TRITON_SERVER_URL} \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--input-data random \
|
||||
--batch-sizes ${BATCH_SIZE} \
|
||||
--triton-instances ${TRITON_INSTANCES} \
|
||||
--result-path ${SHARED_DIR}/triton_performance_offline.csv
|
||||
```
|
||||
|
||||
|
||||
12. Run the Triton Inference Server performance offline tests.
|
||||
|
||||
We want to maximize throughput. It assumes you have your data available
|
||||
for inference or that your data saturate to maximum batch size quickly.
|
||||
Triton Inference Server supports offline scenarios with static batching.
|
||||
Static batching allows inference requests to be served
|
||||
as they are received. The largest improvements to throughput come
|
||||
from increasing the batch size due to efficiency gains in the GPU with larger
|
||||
batches.
|
||||
|
||||
```
|
||||
python triton/run_online_performance_test_on_triton.py \
|
||||
--server-url ${TRITON_SERVER_URL} \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--input-data random \
|
||||
--batch-sizes ${BATCH_SIZE} \
|
||||
--triton-instances ${TRITON_INSTANCES} \
|
||||
--number-of-model-instances ${NUMBER_OF_MODEL_INSTANCES} \
|
||||
--result-path ${SHARED_DIR}/triton_performance_online.csv
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Advanced
|
||||
|
||||
|
||||
### Prepare configuration
|
||||
You can use the environment variables to set the parameters of your inference
|
||||
configuration.
|
||||
|
||||
Triton deployment scripts support several inference runtimes listed in the table below:
|
||||
| Inference runtime | Mnemonic used in scripts |
|
||||
|--------------------|--------------------------|
|
||||
| [TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) | `tf-savedmodel` |
|
||||
| [TensorFlow TensorRT](https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html) | `tf-trt` |
|
||||
| [ONNX](https://onnx.ai) | `onnx` |
|
||||
| [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt) | `trt` |
|
||||
|
||||
The name of the inference runtime should be put into the `FORMAT` variable.
|
||||
|
||||
|
||||
|
||||
Example values of some key variables in one configuration:
|
||||
```
|
||||
PRECISION="fp16"
|
||||
FORMAT="tf-trt"
|
||||
BATCH_SIZE="1, 2, 4, 8, 16, 32, 64, 128"
|
||||
BACKEND_ACCELERATOR="trt"
|
||||
MAX_BATCH_SIZE="128"
|
||||
NUMBER_OF_MODEL_INSTANCES="2"
|
||||
TRITON_MAX_QUEUE_DELAY="1"
|
||||
TRITON_PREFERRED_BATCH_SIZES="64 128"
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Latency explanation
|
||||
A typical Triton Inference Server pipeline can be broken down into the following steps:
|
||||
|
||||
1. The client serializes the inference request into a message and sends it to
|
||||
the server (Client Send).
|
||||
2. The message travels over the network from the client to the server (Network).
|
||||
3. The message arrives at the server and is deserialized (Server Receive).
|
||||
4. The request is placed on the queue (Server Queue).
|
||||
5. The request is removed from the queue and computed (Server Compute).
|
||||
6. The completed request is serialized in a message and sent back to
|
||||
the client (Server Send).
|
||||
7. The completed message then travels over the network from the server
|
||||
to the client (Network).
|
||||
8. The completed message is deserialized by the client and processed as
|
||||
a completed inference request (Client Receive).
|
||||
|
||||
Generally, for local clients, steps 1-4 and 6-8 will only occupy
|
||||
a small fraction of time, compared to steps 5. As backend deep learning
|
||||
systems like Jasper are rarely exposed directly to end users, but instead
|
||||
only interfacing with local front-end servers, for the sake of Jasper,
|
||||
we can consider that all clients are local.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Performance
|
||||
|
||||
|
||||
### Offline scenario
|
||||
This table lists the common variable parameters for all performance measurements:
|
||||
| Parameter Name | Parameter Value |
|
||||
|:-----------------------------|:------------------|
|
||||
| Max Batch Size | 128.0 |
|
||||
| Number of model instances | 2.0 |
|
||||
| Triton Max Queue Delay | 1.0 |
|
||||
| Triton Preferred Batch Sizes | 64 128 |
|
||||
|
||||
|
||||
#### Offline: NVIDIA A40, TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA A40
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
|![](plots/graph_performance_offline_3l.svg)|![](plots/graph_performance_offline_3r.svg)|
|
||||
|-----|-----|
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Precision | Backend Accelerator | Client Batch Size | Inferences/second | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|:------------|:---------------------|--------------------:|--------------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| FP16 | TensorRT | 1 | 329.5 | 3.23 | 3.43 | 3.973 | 3.031 |
|
||||
| FP16 | TensorRT | 2 | 513.8 | 4.292 | 4.412 | 4.625 | 3.888 |
|
||||
| FP16 | TensorRT | 4 | 720.8 | 6.122 | 6.264 | 6.5 | 5.543 |
|
||||
| FP16 | TensorRT | 8 | 919.2 | 9.145 | 9.664 | 10.3 | 8.701 |
|
||||
| FP16 | TensorRT | 16 | 1000 | 17.522 | 17.979 | 19.098 | 16.01 |
|
||||
| FP16 | TensorRT | 32 | 889.6 | 37.49 | 38.481 | 40.316 | 35.946 |
|
||||
| FP16 | TensorRT | 64 | 992 | 66.837 | 67.923 | 70.324 | 64.645 |
|
||||
| FP16 | TensorRT | 128 | 896 | 148.461 | 149.854 | 150.05 | 143.684 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
#### Offline: NVIDIA DGX A100 (1x A100 80GB), TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA DGX A100 (1x A100 80GB)
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
|![](plots/graph_performance_offline_7l.svg)|![](plots/graph_performance_offline_7r.svg)|
|
||||
|-----|-----|
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Precision | Backend Accelerator | Client Batch Size | Inferences/second | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|:------------|:---------------------|--------------------:|--------------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| FP16 | TensorRT | 1 | 387.9 | 2.626 | 2.784 | 2.875 | 2.574 |
|
||||
| FP16 | TensorRT | 2 | 637.2 | 3.454 | 3.506 | 3.547 | 3.135 |
|
||||
| FP16 | TensorRT | 4 | 982.4 | 4.328 | 4.454 | 4.627 | 4.07 |
|
||||
| FP16 | TensorRT | 8 | 1181.6 | 7.012 | 7.074 | 7.133 | 6.765 |
|
||||
| FP16 | TensorRT | 16 | 1446.4 | 11.162 | 11.431 | 11.941 | 11.061 |
|
||||
| FP16 | TensorRT | 32 | 1353.6 | 24.392 | 24.914 | 25.178 | 23.603 |
|
||||
| FP16 | TensorRT | 64 | 1478.4 | 45.539 | 46.096 | 47.546 | 43.401 |
|
||||
| FP16 | TensorRT | 128 | 1331.2 | 97.504 | 100.611 | 101.896 | 96.198 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
#### Offline: NVIDIA DGX-1 (1x V100 32GB), TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA DGX A100 (1x A100 80GB)
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
|![](plots/graph_performance_offline_11l.svg)|![](plots/graph_performance_offline_11r.svg)|
|
||||
|-----|-----|
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Precision | Backend Accelerator | Client Batch Size | Inferences/second | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|:------------|:---------------------|--------------------:|--------------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| FP16 | TensorRT | 1 | 255.6 | 4.032 | 4.061 | 4.141 | 3.909 |
|
||||
| FP16 | TensorRT | 2 | 419.2 | 4.892 | 4.94 | 5.133 | 4.766 |
|
||||
| FP16 | TensorRT | 4 | 633.6 | 6.603 | 6.912 | 7.18 | 6.306 |
|
||||
| FP16 | TensorRT | 8 | 865.6 | 9.657 | 9.73 | 9.834 | 9.236 |
|
||||
| FP16 | TensorRT | 16 | 950.4 | 18.396 | 20.748 | 23.873 | 16.824 |
|
||||
| FP16 | TensorRT | 32 | 854.4 | 37.965 | 38.599 | 40.34 | 37.432 |
|
||||
| FP16 | TensorRT | 64 | 825.6 | 80.118 | 80.758 | 87.374 | 77.596 |
|
||||
| FP16 | TensorRT | 128 | 704 | 189.198 | 189.87 | 191.259 | 183.205 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
#### Offline: NVIDIA T4, TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA T4
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
|![](plots/graph_performance_offline_15l.svg)|![](plots/graph_performance_offline_15r.svg)|
|
||||
|-----|-----|
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Precision | Backend Accelerator | Client Batch Size | Inferences/second | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|:------------|:---------------------|--------------------:|--------------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| FP16 | TensorRT | 1 | 211.7 | 4.89 | 4.926 | 4.965 | 4.717 |
|
||||
| FP16 | TensorRT | 2 | 327.8 | 6.258 | 6.309 | 6.436 | 6.094 |
|
||||
| FP16 | TensorRT | 4 | 468.4 | 8.996 | 9.085 | 9.239 | 8.531 |
|
||||
| FP16 | TensorRT | 8 | 544.8 | 15.654 | 15.978 | 16.324 | 14.673 |
|
||||
| FP16 | TensorRT | 16 | 544 | 30.626 | 30.788 | 31.311 | 29.477 |
|
||||
| FP16 | TensorRT | 32 | 524.8 | 64.527 | 65.35 | 66.13 | 60.943 |
|
||||
| FP16 | TensorRT | 64 | 556.8 | 115.455 | 115.717 | 116.02 | 113.802 |
|
||||
| FP16 | TensorRT | 128 | 537.6 | 242.501 | 244.599 | 246.16 | 238.384 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Online scenario
|
||||
|
||||
This table lists the common variable parameters for all performance measurements:
|
||||
| Parameter Name | Parameter Value |
|
||||
|:-----------------------------|:------------------|
|
||||
| Max Batch Size | 128.0 |
|
||||
| Number of model instances | 2.0 |
|
||||
| Triton Max Queue Delay | 1.0 |
|
||||
| Triton Preferred Batch Sizes | 64 128 |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#### Online: NVIDIA A40, TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA A40
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
![](plots/graph_performance_online_6.svg)
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Concurrent client requests | Inferences/second | Client Send | Network+server Send/recv | Server Queue | Server Compute Input | Server Compute Infer | Server Compute Output | Client Recv | P50 Latency | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|-----------------------------:|--------------------:|--------------:|---------------------------:|---------------:|-----------------------:|-----------------------:|------------------------:|--------------:|--------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| 16 | 1421.3 | 0.109 | 4.875 | 1.126 | 0.895 | 4.188 | 0.053 | 0 | 11.046 | 17.34 | 17.851 | 19.013 | 11.246 |
|
||||
| 32 | 1920 | 0.118 | 8.402 | 1.47 | 1.323 | 5.277 | 0.09 | 0 | 16.328 | 28.052 | 29.871 | 31.932 | 16.68 |
|
||||
| 48 | 2270.4 | 0.12 | 11.505 | 1.856 | 1.582 | 5.953 | 0.113 | 0 | 22.172 | 31.87 | 35.395 | 41.256 | 21.129 |
|
||||
| 64 | 2401.9 | 0.12 | 14.443 | 2.299 | 2.358 | 7.285 | 0.149 | 0 | 26.69 | 37.388 | 40.73 | 47.503 | 26.654 |
|
||||
| 80 | 2823 | 0.126 | 14.917 | 2.71 | 2.406 | 7.977 | 0.174 | 0 | 29.113 | 39.932 | 43.789 | 51.24 | 28.31 |
|
||||
| 96 | 2903.8 | 0.133 | 18.824 | 2.929 | 2.595 | 8.364 | 0.18 | 0 | 33.951 | 46.785 | 51.878 | 60.37 | 33.025 |
|
||||
| 112 | 3096.6 | 0.135 | 20.018 | 3.362 | 2.97 | 9.434 | 0.209 | 0 | 37.927 | 50.587 | 55.169 | 63.141 | 36.128 |
|
||||
| 128 | 3252 | 0.138 | 21.092 | 3.912 | 3.445 | 10.505 | 0.245 | 0 | 41.241 | 53.912 | 58.961 | 68.864 | 39.337 |
|
||||
| 144 | 3352.4 | 0.137 | 21.407 | 4.527 | 4.237 | 12.363 | 0.293 | 0 | 44.211 | 59.876 | 65.971 | 79.335 | 42.964 |
|
||||
| 160 | 3387.4 | 0.137 | 22.947 | 5.179 | 4.847 | 13.805 | 0.326 | 0 | 48.423 | 65.393 | 69.568 | 81.288 | 47.241 |
|
||||
| 176 | 3409.1 | 0.142 | 24.989 | 5.623 | 5.539 | 14.956 | 0.357 | 0 | 52.714 | 71.332 | 78.478 | 99.086 | 51.606 |
|
||||
| 192 | 3481.8 | 0.143 | 25.661 | 6.079 | 6.666 | 16.442 | 0.372 | 0 | 55.383 | 79.276 | 95.479 | 122.295 | 55.363 |
|
||||
| 208 | 3523.8 | 0.147 | 27.042 | 6.376 | 7.526 | 17.413 | 0.4 | 0 | 58.823 | 86.375 | 104.134 | 123.278 | 58.904 |
|
||||
| 224 | 3587.2 | 0.148 | 29.648 | 6.776 | 7.659 | 17.85 | 0.411 | 0 | 61.973 | 91.804 | 107.987 | 130.413 | 62.492 |
|
||||
| 240 | 3507.4 | 0.153 | 31.079 | 7.987 | 9.246 | 19.342 | 0.426 | 0 | 65.697 | 106.035 | 121.914 | 137.572 | 68.233 |
|
||||
| 256 | 3504.4 | 0.16 | 34.664 | 8.252 | 9.886 | 19.567 | 0.461 | 0 | 70.708 | 115.965 | 127.808 | 147.327 | 72.99 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
#### Online: NVIDIA DGX A100 (1x A100 80GB), TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA DGX A100 (1x A100 80GB)
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
![](plots/graph_performance_online_14.svg)
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Concurrent client requests | Inferences/second | Client Send | Network+server Send/recv | Server Queue | Server Compute Input | Server Compute Infer | Server Compute Output | Client Recv | P50 Latency | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|-----------------------------:|--------------------:|--------------:|---------------------------:|---------------:|-----------------------:|-----------------------:|------------------------:|--------------:|--------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| 16 | 1736.5 | 0.11 | 2.754 | 1.272 | 0.954 | 4.08 | 0.036 | 0 | 9.037 | 12.856 | 13.371 | 15.174 | 9.206 |
|
||||
| 32 | 2418.9 | 0.114 | 5.15 | 1.494 | 1.361 | 5.031 | 0.072 | 0 | 13.234 | 20.638 | 21.717 | 23.352 | 13.222 |
|
||||
| 48 | 2891.3 | 0.112 | 7.389 | 1.721 | 1.586 | 5.688 | 0.096 | 0 | 17.089 | 25.946 | 27.611 | 29.784 | 16.592 |
|
||||
| 64 | 3432.6 | 0.11 | 7.866 | 2.11 | 2.126 | 6.301 | 0.131 | 0 | 19.322 | 25.971 | 28.845 | 34.024 | 18.644 |
|
||||
| 80 | 3644.6 | 0.116 | 9.665 | 2.33 | 2.493 | 7.185 | 0.146 | 0 | 22.834 | 29.061 | 32.281 | 37.224 | 21.935 |
|
||||
| 96 | 3902.2 | 0.116 | 11.138 | 2.676 | 2.828 | 7.684 | 0.166 | 0 | 25.589 | 32.572 | 35.307 | 40.123 | 24.608 |
|
||||
| 112 | 3960.6 | 0.124 | 13.321 | 2.964 | 3.209 | 8.438 | 0.186 | 0 | 29.537 | 37.388 | 40.602 | 46.193 | 28.242 |
|
||||
| 128 | 4137.7 | 0.124 | 14.325 | 3.372 | 3.646 | 9.244 | 0.219 | 0 | 31.587 | 41.968 | 44.993 | 51.38 | 30.93 |
|
||||
| 144 | 4139.6 | 0.136 | 15.919 | 3.803 | 4.451 | 10.274 | 0.233 | 0 | 35.696 | 48.301 | 51.345 | 57.414 | 34.816 |
|
||||
| 160 | 4300.5 | 0.134 | 16.453 | 4.341 | 4.934 | 10.979 | 0.274 | 0 | 38.495 | 50.566 | 53.943 | 61.406 | 37.115 |
|
||||
| 176 | 4166.6 | 0.143 | 18.436 | 4.959 | 6.081 | 12.321 | 0.309 | 0 | 43.451 | 60.739 | 69.51 | 84.959 | 42.249 |
|
||||
| 192 | 4281.3 | 0.138 | 19.585 | 5.201 | 6.571 | 13.042 | 0.313 | 0 | 46.175 | 62.718 | 69.46 | 83.032 | 44.85 |
|
||||
| 208 | 4314.8 | 0.15 | 20.046 | 5.805 | 7.752 | 14.062 | 0.335 | 0 | 47.957 | 73.848 | 84.644 | 96.408 | 48.15 |
|
||||
| 224 | 4388.2 | 0.141 | 21.393 | 6.105 | 8.236 | 14.85 | 0.343 | 0 | 50.449 | 77.534 | 88.553 | 100.727 | 51.068 |
|
||||
| 240 | 4371.8 | 0.143 | 22.342 | 6.711 | 9.423 | 15.78 | 0.377 | 0 | 53.216 | 85.983 | 97.756 | 112.48 | 54.776 |
|
||||
| 256 | 4617.3 | 0.144 | 23.392 | 6.595 | 9.466 | 15.568 | 0.367 | 0 | 54.703 | 86.054 | 93.95 | 105.917 | 55.532 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
#### Online: NVIDIA DGX-1 (1x V100 32GB), TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA DGX-1 (1x V100 32GB)
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
![](plots/graph_performance_online_22.svg)
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Concurrent client requests | Inferences/second | Client Send | Network+server Send/recv | Server Queue | Server Compute Input | Server Compute Infer | Server Compute Output | Client Recv | P50 Latency | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|-----------------------------:|--------------------:|--------------:|---------------------------:|---------------:|-----------------------:|-----------------------:|------------------------:|--------------:|--------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| 16 | 1259.7 | 0.121 | 3.735 | 1.999 | 0.803 | 5.998 | 0.034 | 0 | 13.623 | 17.271 | 17.506 | 18.938 | 12.69 |
|
||||
| 32 | 1686.4 | 0.17 | 6.9 | 2.33 | 2.212 | 7.303 | 0.07 | 0 | 18.836 | 28.302 | 30.423 | 32.916 | 18.985 |
|
||||
| 48 | 1888.3 | 0.183 | 9.068 | 3.372 | 3.65 | 9.058 | 0.108 | 0.001 | 26.571 | 36.583 | 40.84 | 50.402 | 25.44 |
|
||||
| 64 | 2103.9 | 0.204 | 12.416 | 3.146 | 4.304 | 10.127 | 0.145 | 0.001 | 32.401 | 37.121 | 41.252 | 49.094 | 30.343 |
|
||||
| 80 | 2255.2 | 0.211 | 13.753 | 4.074 | 5.455 | 11.776 | 0.192 | 0.001 | 38.298 | 47.082 | 54.476 | 65.412 | 35.462 |
|
||||
| 96 | 2376.6 | 0.214 | 16.22 | 4.873 | 5.972 | 12.911 | 0.208 | 0.001 | 43.008 | 52.947 | 57.126 | 69.778 | 40.399 |
|
||||
| 112 | 2445.6 | 0.243 | 18.495 | 5.461 | 7.012 | 14.365 | 0.248 | 0.001 | 48.081 | 62.414 | 68.274 | 85.766 | 45.825 |
|
||||
| 128 | 2534.2 | 0.261 | 19.294 | 6.486 | 7.925 | 16.312 | 0.282 | 0.001 | 52.894 | 68.475 | 74.852 | 89.979 | 50.561 |
|
||||
| 144 | 2483.9 | 0.27 | 20.771 | 7.744 | 9.993 | 18.865 | 0.414 | 0.001 | 64.866 | 70.434 | 80.279 | 99.177 | 58.058 |
|
||||
| 160 | 2512.8 | 0.302 | 24.205 | 7.838 | 11.217 | 19.689 | 0.373 | 0.001 | 69.085 | 85.576 | 95.016 | 109.455 | 63.625 |
|
||||
| 176 | 2541 | 0.311 | 26.206 | 8.556 | 12.439 | 21.393 | 0.418 | 0.001 | 76.666 | 92.266 | 106.889 | 127.055 | 69.324 |
|
||||
| 192 | 2623.4 | 0.33 | 27.783 | 9.058 | 13.198 | 22.181 | 0.433 | 0.001 | 79.724 | 97.736 | 111.44 | 142.418 | 72.984 |
|
||||
| 208 | 2616.2 | 0.353 | 29.667 | 9.759 | 15.693 | 23.567 | 0.444 | 0.001 | 80.571 | 125.202 | 140.527 | 175.331 | 79.484 |
|
||||
| 224 | 2693.9 | 0.369 | 32.283 | 9.941 | 15.769 | 24.304 | 0.439 | 0.001 | 78.743 | 137.09 | 151.955 | 183.397 | 83.106 |
|
||||
| 240 | 2700.4 | 0.447 | 32.287 | 11.128 | 18.204 | 26.578 | 0.456 | 0.001 | 82.561 | 155.011 | 177.925 | 191.51 | 89.101 |
|
||||
| 256 | 2743.8 | 0.481 | 34.688 | 11.834 | 19.087 | 26.597 | 0.459 | 0.001 | 89.387 | 153.866 | 177.805 | 204.319 | 93.147 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
#### Online: NVIDIA T4, TF-TRT with FP16
|
||||
|
||||
Our results were obtained using the following configuration:
|
||||
* **GPU:** NVIDIA T4
|
||||
* **Backend:** TensorFlow
|
||||
* **Model binding:** TF-TRT
|
||||
* **Precision:** FP16
|
||||
* **Model format:** TensorFlow SavedModel
|
||||
|
||||
![](plots/graph_performance_online_30.svg)
|
||||
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Full tabular data
|
||||
</summary>
|
||||
|
||||
| Concurrent client requests | Inferences/second | Client Send | Network+server Send/recv | Server Queue | Server Compute Input | Server Compute Infer | Server Compute Output | Client Recv | P50 Latency | P90 Latency | P95 Latency | P99 Latency | Avg Latency |
|
||||
|-----------------------------:|--------------------:|--------------:|---------------------------:|---------------:|-----------------------:|-----------------------:|------------------------:|--------------:|--------------:|--------------:|--------------:|--------------:|--------------:|
|
||||
| 16 | 731.4 | 0.271 | 6.9 | 3.745 | 2.073 | 8.802 | 0.081 | 0.001 | 25.064 | 28.863 | 29.7 | 32.01 | 21.873 |
|
||||
| 32 | 935 | 0.273 | 12.023 | 3.48 | 4.375 | 13.885 | 0.141 | 0.001 | 31.339 | 50.564 | 52.684 | 55.823 | 34.178 |
|
||||
| 48 | 1253 | 0.298 | 12.331 | 5.313 | 4.623 | 15.634 | 0.178 | 0.001 | 38.099 | 60.665 | 64.537 | 72.38 | 38.378 |
|
||||
| 64 | 1368.3 | 0.303 | 15.3 | 6.926 | 4.9 | 19.118 | 0.2 | 0.001 | 48.758 | 66.391 | 73.271 | 81.537 | 46.748 |
|
||||
| 80 | 1410.7 | 0.296 | 15.525 | 11.06 | 6.934 | 22.476 | 0.286 | 0.001 | 60.346 | 65.664 | 76.055 | 84.643 | 56.578 |
|
||||
| 96 | 1473.1 | 0.309 | 18.846 | 11.746 | 7.825 | 26.165 | 0.319 | 0.001 | 69.785 | 77.337 | 91.586 | 100.918 | 65.211 |
|
||||
| 112 | 1475.5 | 0.316 | 23.275 | 12.412 | 8.954 | 30.724 | 0.338 | 0.001 | 79.904 | 106.324 | 111.382 | 126.559 | 76.02 |
|
||||
| 128 | 1535.9 | 0.328 | 23.486 | 14.64 | 10.057 | 34.534 | 0.352 | 0.001 | 89.451 | 110.789 | 121.814 | 140.139 | 83.398 |
|
||||
| 144 | 1512.3 | 0.336 | 25.79 | 18.7 | 12.205 | 37.909 | 0.435 | 0.001 | 103.388 | 108.917 | 114.44 | 136.469 | 95.376 |
|
||||
| 160 | 1533.6 | 0.406 | 29.825 | 17.67 | 13.751 | 42.259 | 0.44 | 0.001 | 111.899 | 140.67 | 154.76 | 191.391 | 104.352 |
|
||||
| 176 | 1515.1 | 0.438 | 34.286 | 17.867 | 16.42 | 46.792 | 0.461 | 0.001 | 120.503 | 187.317 | 205.71 | 223.391 | 116.265 |
|
||||
| 192 | 1532.2 | 0.476 | 34.796 | 18.86 | 19.071 | 51.446 | 0.483 | 0.001 | 124.044 | 211.466 | 226.921 | 237.664 | 125.133 |
|
||||
| 208 | 1616.7 | 0.697 | 32.363 | 21.465 | 18.315 | 55.539 | 0.516 | 0.001 | 127.891 | 200.478 | 221.404 | 250.348 | 128.896 |
|
||||
| 224 | 1541.5 | 0.702 | 35.932 | 22.786 | 22.138 | 62.657 | 0.527 | 0.001 | 141.32 | 248.069 | 263.661 | 276.579 | 144.743 |
|
||||
| 240 | 1631.7 | 0.79 | 37.581 | 22.791 | 21.651 | 64.278 | 0.549 | 0.001 | 141.393 | 250.354 | 272.17 | 289.926 | 147.641 |
|
||||
| 256 | 1607.4 | 0.801 | 39.342 | 29.09 | 23.416 | 66.866 | 0.593 | 0.001 | 157.87 | 262.818 | 280.921 | 310.504 | 160.109 |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Release Notes
|
||||
We’re constantly refining and improving our performance on AI
|
||||
and HPC workloads even on the same hardware with frequent updates
|
||||
to our software stack. For our latest performance data please refer
|
||||
to these pages for
|
||||
[AI](https://developer.nvidia.com/deep-learning-performance-training-inference)
|
||||
and [HPC](https://developer.nvidia.com/hpc-application-performance) benchmarks.
|
||||
|
||||
### Changelog
|
||||
|
||||
July 2020
|
||||
- Initial release
|
||||
|
||||
April 2021
|
||||
- NVIDIA A100 results added
|
||||
|
||||
### Known issues
|
||||
|
||||
There are no known issues with this model with this model.
|
||||
|
||||
|
134
TensorFlow/Classification/ConvNets/triton/calculate_metrics.py
Executable file
|
@ -0,0 +1,134 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
r"""
|
||||
Using `calculate_metrics.py` script, you can obtain model accuracy/error metrics using defined `MetricsCalculator` class.
|
||||
See [documentation](https://gitlab-master.nvidia.com/dl/JoC/bermuda-api/-/blob/develop/bermuda_api_toolset/docs/metrics.md) on preparation of this class.
|
||||
|
||||
Data provided to `MetricsCalculator` are obtained from [npz dump files](https://gitlab-master.nvidia.com/dl/JoC/bermuda-api/-/blob/develop/bermuda_api_toolset/docs/dump_files.md)
|
||||
stored in directory pointed by `--dump-dir` argument.
|
||||
Above files are prepared by `run_inference_on_fw.py` and `run_inference_on_triton.py` scripts.
|
||||
|
||||
Output data is stored in csv file pointed by `--csv` argument.
|
||||
|
||||
Example call:
|
||||
|
||||
```shell script
|
||||
python ./triton/calculate_metrics.py \
|
||||
--dump-dir /results/dump_triton \
|
||||
--csv /results/accuracy_results.csv \
|
||||
--metrics metrics.py \
|
||||
--metric-class-param1 value
|
||||
```
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import string
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
# method from PEP-366 to support relative import in executed modules
|
||||
|
||||
if __package__ is None:
|
||||
__package__ = Path(__file__).parent.name
|
||||
|
||||
from .deployment_toolkit.args import ArgParserGenerator
|
||||
from .deployment_toolkit.core import BaseMetricsCalculator, load_from_file
|
||||
from .deployment_toolkit.dump import pad_except_batch_axis
|
||||
|
||||
LOGGER = logging.getLogger("calculate_metrics")
|
||||
TOTAL_COLUMN_NAME = "_total_"
|
||||
|
||||
|
||||
def get_data(dump_dir, prefix):
|
||||
"""Loads and concatenates dump files for given prefix (ex. inputs, outputs, labels, ids)"""
|
||||
dump_dir = Path(dump_dir)
|
||||
npz_files = sorted(dump_dir.glob(f"{prefix}*.npz"))
|
||||
data = None
|
||||
if npz_files:
|
||||
# assume that all npz files with given prefix contain same set of names
|
||||
names = list(np.load(npz_files[0].as_posix()).keys())
|
||||
# calculate target shape
|
||||
target_shape = {
|
||||
name: tuple(np.max([np.load(npz_file.as_posix())[name].shape for npz_file in npz_files], axis=0))
|
||||
for name in names
|
||||
}
|
||||
# pad and concatenate data
|
||||
data = {
|
||||
name: np.concatenate(
|
||||
[pad_except_batch_axis(np.load(npz_file.as_posix())[name], target_shape[name]) for npz_file in npz_files]
|
||||
)
|
||||
for name in names
|
||||
}
|
||||
return data
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Run models with given dataloader", allow_abbrev=False)
|
||||
parser.add_argument("--metrics", help=f"Path to python module containing metrics calculator", required=True)
|
||||
parser.add_argument("--csv", help="Path to csv file", required=True)
|
||||
parser.add_argument("--dump-dir", help="Path to directory with dumped outputs (and labels)", required=True)
|
||||
|
||||
args, *_ = parser.parse_known_args()
|
||||
|
||||
MetricsCalculator = load_from_file(args.metrics, "metrics", "MetricsCalculator")
|
||||
ArgParserGenerator(MetricsCalculator).update_argparser(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
LOGGER.info(f"args:")
|
||||
for key, value in vars(args).items():
|
||||
LOGGER.info(f" {key} = {value}")
|
||||
|
||||
MetricsCalculator = load_from_file(args.metrics, "metrics", "MetricsCalculator")
|
||||
metrics_calculator: BaseMetricsCalculator = ArgParserGenerator(MetricsCalculator).from_args(args)
|
||||
|
||||
ids = get_data(args.dump_dir, "ids")["ids"]
|
||||
x = get_data(args.dump_dir, "inputs")
|
||||
y_true = get_data(args.dump_dir, "labels")
|
||||
y_pred = get_data(args.dump_dir, "outputs")
|
||||
|
||||
common_keys = list({k for k in (y_true or [])} & {k for k in (y_pred or [])})
|
||||
for key in common_keys:
|
||||
if y_true[key].shape != y_pred[key].shape:
|
||||
LOGGER.warning(
|
||||
f"Model predictions and labels shall have equal shapes. "
|
||||
f"y_pred[{key}].shape={y_pred[key].shape} != "
|
||||
f"y_true[{key}].shape={y_true[key].shape}"
|
||||
)
|
||||
|
||||
metrics = metrics_calculator.calc(ids=ids, x=x, y_pred=y_pred, y_real=y_true)
|
||||
metrics = {TOTAL_COLUMN_NAME: len(ids), **metrics}
|
||||
|
||||
metric_names_with_space = [name for name in metrics if any([c in string.whitespace for c in name])]
|
||||
if metric_names_with_space:
|
||||
raise ValueError(f"Metric names shall have no spaces; Incorrect names: {', '.join(metric_names_with_space)}")
|
||||
|
||||
csv_path = Path(args.csv)
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with csv_path.open("w") as csv_file:
|
||||
writer = csv.DictWriter(csv_file, fieldnames=list(metrics.keys()))
|
||||
writer.writeheader()
|
||||
writer.writerow(metrics)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
193
TensorFlow/Classification/ConvNets/triton/config_model_on_trion.py
Executable file
|
@ -0,0 +1,193 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
r"""
|
||||
To deploy model in Triton, you can use `deploy_model.py` script.
|
||||
This will prepare layout of Model Repository, including Model Configuration.
|
||||
|
||||
```shell script
|
||||
python ./triton/deploy_model.py \
|
||||
--model-repository /model_repository \
|
||||
--model-path /models/exported/model.onnx \
|
||||
--model-format onnx \
|
||||
--model-name ResNet50 \
|
||||
--model-version 1 \
|
||||
--max-batch-size 32 \
|
||||
--precision fp16 \
|
||||
--backend-accelerator trt \
|
||||
--load-model \
|
||||
--timeout 120 \
|
||||
--verbose
|
||||
```
|
||||
|
||||
If Triton server to which we prepare model repository is running with **explicit model control mode**,
|
||||
use `--load-model` argument to send request load_model request to Triton Inference Server.
|
||||
If server is listening on non-default address or port use `--server-url` argument to point server control endpoint.
|
||||
If it is required to use HTTP protocol to communcate with Triton server use `--http` argument.
|
||||
|
||||
To improve inference throughput you can use
|
||||
[dynamic batching](https://github.com/triton-inference-server/server/blob/master/docs/model_configuration.md#dynamic-batcher)
|
||||
for your model by providing `--preferred-batch-sizes` and `--max-queue-delay-us` parameters.
|
||||
|
||||
By default Triton will [automatically obtain inputs and outputs definitions](https://github.com/triton-inference-server/server/blob/master/docs/model_configuration.md#auto-generated-model-configuration).
|
||||
but for TorchScript models script uses file with I/O specs. This file is automatically generated
|
||||
when the model is converted to ScriptModule (either traced or scripted).
|
||||
If there is a need to pass different than default path to I/O spec file use `--io-spec` CLI argument.
|
||||
|
||||
I/O spec file is yaml file with below structure:
|
||||
|
||||
```yaml
|
||||
- inputs:
|
||||
- name: input
|
||||
dtype: float32 # np.dtype name
|
||||
shape: [None, 224, 224, 3]
|
||||
- outputs:
|
||||
- name: probabilities
|
||||
dtype: float32
|
||||
shape: [None, 1001]
|
||||
- name: classes
|
||||
dtype: int32
|
||||
shape: [None, 1]
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
from service_maker import Accelerator, Format, Precision
|
||||
from service_maker.args import str2bool
|
||||
from service_maker.log import dump_arguments, set_logger
|
||||
from service_maker.triton import ModelConfig, TritonClient, TritonModelStore
|
||||
|
||||
LOGGER = logging.getLogger("deploy_model")
|
||||
|
||||
|
||||
def _available_enum_values(my_enum):
|
||||
return [item.value for item in my_enum]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create Triton model repository and model configuration", allow_abbrev=False
|
||||
)
|
||||
parser.add_argument("--model-repository", required=True, help="Path to Triton model repository.")
|
||||
parser.add_argument("--model-path", required=True, help="Path to model to deploy")
|
||||
|
||||
# TODO: automation
|
||||
parser.add_argument(
|
||||
"--model-format",
|
||||
required=True,
|
||||
choices=_available_enum_values(Format),
|
||||
help="Format of model to deploy",
|
||||
)
|
||||
parser.add_argument("--model-name", required=True, help="Model name")
|
||||
parser.add_argument("--model-version", default="1", help="Version of model (default 1)")
|
||||
parser.add_argument(
|
||||
"--max-batch-size",
|
||||
type=int,
|
||||
default=32,
|
||||
help="Maximum batch size allowed for inference. "
|
||||
"A max_batch_size value of 0 indicates that batching is not allowed for the model",
|
||||
)
|
||||
# TODO: automation
|
||||
parser.add_argument(
|
||||
"--precision",
|
||||
type=str,
|
||||
default=Precision.FP16.value,
|
||||
choices=_available_enum_values(Precision),
|
||||
help="Model precision (parameter used only by Tensorflow backend with TensorRT optimization)",
|
||||
)
|
||||
|
||||
# Triton Inference Server endpoint
|
||||
parser.add_argument(
|
||||
"--server-url",
|
||||
type=str,
|
||||
default="grpc://localhost:8001",
|
||||
help="Inference server URL in format protocol://host[:port] (default grpc://localhost:8001)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--load-model",
|
||||
choices=["none", "poll", "explicit"],
|
||||
help="Loading model while Triton Server is in given model control mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout", default=120, help="Timeout in seconds to wait till model load (default=120)", type=int
|
||||
)
|
||||
|
||||
# optimization related
|
||||
parser.add_argument(
|
||||
"--backend-accelerator",
|
||||
type=str,
|
||||
choices=_available_enum_values(Accelerator),
|
||||
default=Accelerator.TRT.value,
|
||||
help="Select Backend Accelerator used to serve model",
|
||||
)
|
||||
parser.add_argument("--number-of-model-instances", type=int, default=1, help="Number of model instances per GPU")
|
||||
parser.add_argument(
|
||||
"--preferred-batch-sizes",
|
||||
type=int,
|
||||
nargs="*",
|
||||
help="Batch sizes that the dynamic batcher should attempt to create. "
|
||||
"In case --max-queue-delay-us is set and this parameter is not, default value will be --max-batch-size",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-queue-delay-us",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Max delay time which dynamic batcher shall wait to form a batch (default 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--capture-cuda-graph",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Use cuda capture graph (used only by TensorRT platform)",
|
||||
)
|
||||
|
||||
parser.add_argument("-v", "--verbose", help="Provide verbose logs", type=str2bool, default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
set_logger(verbose=args.verbose)
|
||||
dump_arguments(args)
|
||||
|
||||
config = ModelConfig.create(
|
||||
model_path=args.model_path,
|
||||
# model definition
|
||||
model_name=args.model_name,
|
||||
model_version=args.model_version,
|
||||
model_format=args.model_format,
|
||||
precision=args.precision,
|
||||
max_batch_size=args.max_batch_size,
|
||||
# optimization
|
||||
accelerator=args.backend_accelerator,
|
||||
gpu_engine_count=args.number_of_model_instances,
|
||||
preferred_batch_sizes=args.preferred_batch_sizes or [],
|
||||
max_queue_delay_us=args.max_queue_delay_us,
|
||||
capture_cuda_graph=args.capture_cuda_graph,
|
||||
)
|
||||
|
||||
model_store = TritonModelStore(args.model_repository)
|
||||
model_store.deploy_model(model_config=config, model_path=args.model_path)
|
||||
|
||||
if args.load_model != "none":
|
||||
client = TritonClient(server_url=args.server_url, verbose=args.verbose)
|
||||
if args.load_model == "explicit":
|
||||
client.load_model(model_name=args.model_name)
|
||||
client.wait_for_model(model_name=args.model_name, model_version=args.model_version, timeout_s=args.timeout)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
166
TensorFlow/Classification/ConvNets/triton/convert_model.py
Executable file
|
@ -0,0 +1,166 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
r"""
|
||||
`convert_model.py` script allows to convert between model formats with additional model optimizations
|
||||
for faster inference.
|
||||
It converts model from results of [`get_model`](https://gitlab-master.nvidia.com/dl/JoC/bermuda-api/-/blob/develop/bermuda_api_toolset/docs/model.md) function.
|
||||
|
||||
Currently supported input and output formats are:
|
||||
|
||||
- inputs
|
||||
- `tf-estimator` - `get_model` function returning Tensorflow Estimator
|
||||
- `tf-keras` - `get_model` function returning Tensorflow Keras Model
|
||||
- `tf-savedmodel` - Tensorflow SavedModel binary
|
||||
- `pyt` - `get_model` function returning PyTorch Module
|
||||
- output
|
||||
- `tf-savedmodel` - Tensorflow saved model
|
||||
- `tf-trt` - TF-TRT saved model
|
||||
- `ts-trace` - PyTorch traced ScriptModule
|
||||
- `ts-script` - PyTorch scripted ScriptModule
|
||||
- `onnx` - ONNX
|
||||
- `trt` - TensorRT plan file
|
||||
|
||||
For tf-keras input you can use:
|
||||
- --large-model flag - helps loading model which exceeds maximum protobuf size of 2GB
|
||||
- --tf-allow-growth flag - control limiting GPU memory growth feature
|
||||
(https://www.tensorflow.org/guide/gpu#limiting_gpu_memory_growth). By default it is disabled.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
||||
os.environ["TF_ENABLE_DEPRECATION_WARNINGS"] = "1"
|
||||
|
||||
# method from PEP-366 to support relative import in executed modules
|
||||
if __name__ == "__main__" and __package__ is None:
|
||||
__package__ = Path(__file__).parent.name
|
||||
|
||||
from .deployment_toolkit.args import ArgParserGenerator
|
||||
from .deployment_toolkit.core import (
|
||||
DATALOADER_FN_NAME,
|
||||
BaseConverter,
|
||||
BaseLoader,
|
||||
BaseSaver,
|
||||
Format,
|
||||
Precision,
|
||||
load_from_file,
|
||||
)
|
||||
from .deployment_toolkit.extensions import converters, loaders, savers
|
||||
|
||||
LOGGER = logging.getLogger("convert_model")
|
||||
|
||||
INPUT_MODEL_TYPES = [Format.TF_ESTIMATOR, Format.TF_KERAS, Format.TF_SAVEDMODEL, Format.PYT]
|
||||
OUTPUT_MODEL_TYPES = [Format.TF_SAVEDMODEL, Format.TF_TRT, Format.ONNX, Format.TRT, Format.TS_TRACE, Format.TS_SCRIPT]
|
||||
|
||||
|
||||
def _get_args():
|
||||
parser = argparse.ArgumentParser(description="Script for conversion between model formats.", allow_abbrev=False)
|
||||
parser.add_argument("--input-path", help="Path to input model file (python module or binary file)", required=True)
|
||||
parser.add_argument(
|
||||
"--input-type", help="Input model type", choices=[f.value for f in INPUT_MODEL_TYPES], required=True
|
||||
)
|
||||
parser.add_argument("--output-path", help="Path to output model file", required=True)
|
||||
parser.add_argument(
|
||||
"--output-type", help="Output model type", choices=[f.value for f in OUTPUT_MODEL_TYPES], required=True
|
||||
)
|
||||
parser.add_argument("--dataloader", help="Path to python module containing data loader")
|
||||
parser.add_argument("-v", "--verbose", help="Verbose logs", action="store_true", default=False)
|
||||
parser.add_argument(
|
||||
"--ignore-unknown-parameters",
|
||||
help="Ignore unknown parameters (argument often used in CI where set of arguments is constant)",
|
||||
action="store_true",
|
||||
default=False,
|
||||
)
|
||||
|
||||
args, unparsed_args = parser.parse_known_args()
|
||||
|
||||
Loader: BaseLoader = loaders.get(args.input_type)
|
||||
ArgParserGenerator(Loader, module_path=args.input_path).update_argparser(parser)
|
||||
|
||||
converter_name = f"{args.input_type}--{args.output_type}"
|
||||
Converter: BaseConverter = converters.get(converter_name)
|
||||
if Converter is not None:
|
||||
ArgParserGenerator(Converter).update_argparser(parser)
|
||||
|
||||
Saver: BaseSaver = savers.get(args.output_type)
|
||||
ArgParserGenerator(Saver).update_argparser(parser)
|
||||
|
||||
if args.dataloader is not None:
|
||||
get_dataloader_fn = load_from_file(args.dataloader, label="dataloader", target=DATALOADER_FN_NAME)
|
||||
ArgParserGenerator(get_dataloader_fn).update_argparser(parser)
|
||||
|
||||
if args.ignore_unknown_parameters:
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
LOGGER.warning(f"Got additional args {unknown_args}")
|
||||
else:
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = _get_args()
|
||||
|
||||
log_level = logging.INFO if not args.verbose else logging.DEBUG
|
||||
log_format = "%(asctime)s %(levelname)s %(name)s %(message)s"
|
||||
logging.basicConfig(level=log_level, format=log_format)
|
||||
|
||||
LOGGER.info(f"args:")
|
||||
for key, value in vars(args).items():
|
||||
LOGGER.info(f" {key} = {value}")
|
||||
|
||||
requested_model_precision = Precision(args.precision)
|
||||
dataloader_fn = None
|
||||
|
||||
# if conversion is required, temporary change model load precision to that required by converter
|
||||
# it is for TensorRT converters which require fp32 models for all requested precisions
|
||||
converter_name = f"{args.input_type}--{args.output_type}"
|
||||
Converter: BaseConverter = converters.get(converter_name)
|
||||
if Converter:
|
||||
args.precision = Converter.required_source_model_precision(requested_model_precision).value
|
||||
|
||||
Loader: BaseLoader = loaders.get(args.input_type)
|
||||
loader = ArgParserGenerator(Loader, module_path=args.input_path).from_args(args)
|
||||
model = loader.load(args.input_path)
|
||||
|
||||
|
||||
LOGGER.info("inputs: %s", model.inputs)
|
||||
LOGGER.info("outputs: %s", model.outputs)
|
||||
|
||||
if Converter: # if conversion is needed
|
||||
# dataloader must much source model precision - so not recovering it yet
|
||||
if args.dataloader is not None:
|
||||
get_dataloader_fn = load_from_file(args.dataloader, label="dataloader", target=DATALOADER_FN_NAME)
|
||||
dataloader_fn = ArgParserGenerator(get_dataloader_fn).from_args(args)
|
||||
|
||||
# recover precision to that requested by user
|
||||
args.precision = requested_model_precision.value
|
||||
|
||||
if Converter:
|
||||
converter = ArgParserGenerator(Converter).from_args(args)
|
||||
model = converter.convert(model, dataloader_fn=dataloader_fn)
|
||||
|
||||
Saver: BaseSaver = savers.get(args.output_type)
|
||||
saver = ArgParserGenerator(Saver).from_args(args)
|
||||
saver.save(model, args.output_path)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
45
TensorFlow/Classification/ConvNets/triton/dataloader.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from rn50_model import HEIGHT, WIDTH
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_dataloader_fn(
|
||||
*, data_dir: str, batch_size: int = 1, width: int = WIDTH, height: int = HEIGHT, images_num: int = None
|
||||
):
|
||||
image_extensions = [".gif", ".png", ".jpeg", ".jpg"]
|
||||
|
||||
image_paths = sorted([p for p in Path(data_dir).rglob("*") if p.suffix.lower() in image_extensions])
|
||||
if images_num is not None:
|
||||
image_paths = image_paths[:images_num]
|
||||
|
||||
LOGGER.info(
|
||||
f"Creating PIL dataloader on data_dir={data_dir} #images={len(image_paths)} "
|
||||
f"image_size=({width}, {height}) batch_size={batch_size}"
|
||||
)
|
||||
|
||||
def _dataloader_fn():
|
||||
batch = []
|
||||
for image_path in image_paths:
|
||||
img = Image.open(image_path.as_posix()).convert('RGB')
|
||||
img = img.resize((width, height))
|
||||
img = np.array(img).astype(np.float32)
|
||||
true_class = np.array([int(image_path.parent.name)])
|
||||
assert tuple(img.shape) == (height, width, 3)
|
||||
img = img[np.newaxis, ...]
|
||||
batch.append((img, image_path.as_posix(), true_class))
|
||||
if len(batch) >= batch_size:
|
||||
ids = [image_path for _, image_path, *_ in batch]
|
||||
x = {
|
||||
"input": np.concatenate([img for img, *_ in batch]),
|
||||
}
|
||||
y_real = {"classes": np.concatenate([class_ for *_, class_ in batch])}
|
||||
batch = []
|
||||
yield ids, x, y_real
|
||||
|
||||
return _dataloader_fn
|
|
@ -0,0 +1 @@
|
|||
0.4.6-46-g5bc739c
|
|
@ -0,0 +1,110 @@
|
|||
import argparse
|
||||
import inspect
|
||||
import logging
|
||||
from typing import Callable, Dict, Optional, Union
|
||||
|
||||
from .core import GET_ARGPARSER_FN_NAME, load_from_file
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def str2bool(v):
|
||||
if isinstance(v, bool):
|
||||
return v
|
||||
if v.lower() in ("yes", "true", "t", "y", "1"):
|
||||
return True
|
||||
elif v.lower() in ("no", "false", "f", "n", "0"):
|
||||
return False
|
||||
else:
|
||||
raise argparse.ArgumentTypeError("Boolean value expected.")
|
||||
|
||||
|
||||
def filter_fn_args(args: Union[dict, argparse.Namespace], fn: Callable) -> dict:
|
||||
signature = inspect.signature(fn)
|
||||
parameters_names = list(signature.parameters)
|
||||
if isinstance(args, argparse.Namespace):
|
||||
args = vars(args)
|
||||
args = {k: v for k, v in args.items() if k in parameters_names}
|
||||
return args
|
||||
|
||||
|
||||
def add_args_for_fn_signature(parser, fn) -> argparse.ArgumentParser:
|
||||
parser.conflict_handler = "resolve"
|
||||
signature = inspect.signature(fn)
|
||||
for parameter in signature.parameters.values():
|
||||
if parameter.name in ["self", "args", "kwargs"]:
|
||||
continue
|
||||
argument_kwargs = {}
|
||||
if parameter.annotation != inspect.Parameter.empty:
|
||||
if parameter.annotation == bool:
|
||||
argument_kwargs["type"] = str2bool
|
||||
argument_kwargs["choices"] = [0, 1]
|
||||
elif type(parameter.annotation) == type(Union):
|
||||
types = [type_ for type_ in parameter.annotation.__args__ if not isinstance(None, type_)]
|
||||
if len(types) != 1:
|
||||
raise RuntimeError(
|
||||
f"Could not prepare argument parser for {parameter.name}: {parameter.annotation} in {fn}"
|
||||
)
|
||||
argument_kwargs["type"] = types[0]
|
||||
else:
|
||||
argument_kwargs["type"] = parameter.annotation
|
||||
|
||||
if parameter.default != inspect.Parameter.empty:
|
||||
if parameter.annotation == bool:
|
||||
argument_kwargs["default"] = str2bool(parameter.default)
|
||||
else:
|
||||
argument_kwargs["default"] = parameter.default
|
||||
else:
|
||||
argument_kwargs["required"] = True
|
||||
name = parameter.name.replace("_", "-")
|
||||
LOGGER.debug(f"Adding argument {name} with {argument_kwargs}")
|
||||
parser.add_argument(f"--{name}", **argument_kwargs)
|
||||
return parser
|
||||
|
||||
|
||||
class ArgParserGenerator:
|
||||
def __init__(self, cls_or_fn, module_path: Optional[str] = None):
|
||||
self._cls_or_fn = cls_or_fn
|
||||
|
||||
self._handle = cls_or_fn if inspect.isfunction(cls_or_fn) else getattr(cls_or_fn, "__init__")
|
||||
input_is_python_file = module_path and module_path.endswith(".py")
|
||||
self._input_path = module_path if input_is_python_file else None
|
||||
self._required_fn_name_for_signature_parsing = getattr(
|
||||
cls_or_fn, "required_fn_name_for_signature_parsing", None
|
||||
)
|
||||
|
||||
def update_argparser(self, parser):
|
||||
name = self._handle.__name__
|
||||
group_parser = parser.add_argument_group(name)
|
||||
add_args_for_fn_signature(group_parser, fn=self._handle)
|
||||
self._update_argparser(group_parser)
|
||||
|
||||
def get_args(self, args: argparse.Namespace):
|
||||
filtered_args = filter_fn_args(args, fn=self._handle)
|
||||
|
||||
tmp_parser = argparse.ArgumentParser(allow_abbrev=False)
|
||||
self._update_argparser(tmp_parser)
|
||||
custom_names = [
|
||||
p.dest.replace("-", "_") for p in tmp_parser._actions if not isinstance(p, argparse._HelpAction)
|
||||
]
|
||||
custom_params = {n: getattr(args, n) for n in custom_names}
|
||||
filtered_args = {**filtered_args, **custom_params}
|
||||
return filtered_args
|
||||
|
||||
def from_args(self, args: Union[argparse.Namespace, Dict]):
|
||||
args = self.get_args(args)
|
||||
LOGGER.info(f"Initializing {self._cls_or_fn.__name__}({args})")
|
||||
return self._cls_or_fn(**args)
|
||||
|
||||
def _update_argparser(self, parser):
|
||||
label = "argparser_update"
|
||||
if self._input_path:
|
||||
update_argparser_handle = load_from_file(self._input_path, label=label, target=GET_ARGPARSER_FN_NAME)
|
||||
if update_argparser_handle:
|
||||
update_argparser_handle(parser)
|
||||
elif self._required_fn_name_for_signature_parsing:
|
||||
fn_handle = load_from_file(
|
||||
self._input_path, label=label, target=self._required_fn_name_for_signature_parsing
|
||||
)
|
||||
if fn_handle:
|
||||
add_args_for_fn_signature(parser, fn_handle)
|
|
@ -0,0 +1,223 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
# pytype: disable=import-error
|
||||
import onnx
|
||||
import onnx.optimizer
|
||||
import onnx.shape_inference
|
||||
import onnxruntime
|
||||
from google.protobuf import text_format
|
||||
from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
|
||||
|
||||
# pytype: enable=import-error
|
||||
|
||||
from ..core import BaseLoader, BaseRunner, BaseRunnerSession, BaseSaver, Format, Model, Precision, TensorSpec
|
||||
from ..extensions import loaders, runners, savers
|
||||
from .utils import infer_precision
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _value_info2tensor_spec(value_info: onnx.ValueInfoProto):
|
||||
onnx_data_type_map = {"float": "float32", "double": "float64"}
|
||||
|
||||
elem_type_name = onnx.TensorProto.DataType.Name(value_info.type.tensor_type.elem_type).lower()
|
||||
dtype = onnx_data_type_map.get(elem_type_name, elem_type_name)
|
||||
|
||||
def _get_dim(dim):
|
||||
which = dim.WhichOneof("value")
|
||||
if which is not None: # which is None when dim is None
|
||||
dim = getattr(dim, which)
|
||||
return None if isinstance(dim, (str, bytes)) else dim
|
||||
|
||||
shape = value_info.type.tensor_type.shape
|
||||
shape = tuple([_get_dim(d) for d in shape.dim])
|
||||
return TensorSpec(value_info.name, dtype=dtype, shape=shape)
|
||||
|
||||
|
||||
def _infer_graph_precision(onnx_graph: onnx.GraphProto) -> Optional[Precision]:
|
||||
import networkx as nx
|
||||
|
||||
# build directed graph
|
||||
nx_graph = nx.DiGraph()
|
||||
|
||||
def _get_dtype(vi):
|
||||
t = vi.type
|
||||
if hasattr(t, "tensor_type"):
|
||||
type_id = t.tensor_type.elem_type
|
||||
else:
|
||||
raise NotImplementedError("Not implemented yet")
|
||||
return TENSOR_TYPE_TO_NP_TYPE[type_id]
|
||||
|
||||
node_output2type = {vi.name: _get_dtype(vi) for vi in onnx_graph.value_info}
|
||||
|
||||
node_outputs2node = {output_name: node for node in onnx_graph.node for output_name in node.output}
|
||||
node_inputs2node = {input_name: node for node in onnx_graph.node for input_name in node.input}
|
||||
|
||||
for node in onnx_graph.node:
|
||||
node_dtype = node_output2type.get("+".join(node.output), None)
|
||||
nx_graph.add_node(
|
||||
node.name,
|
||||
op=node.op_type,
|
||||
attr={a.name: a for a in node.attribute},
|
||||
dtype=node_dtype,
|
||||
)
|
||||
for input_name in node.input:
|
||||
prev_node = node_outputs2node.get(input_name, None)
|
||||
if prev_node:
|
||||
nx_graph.add_edge(prev_node.name, node.name)
|
||||
|
||||
for input_node in onnx_graph.input:
|
||||
input_name = input_node.name
|
||||
nx_graph.add_node(input_name, op="input", dtype=_get_dtype(input_node))
|
||||
next_node = node_inputs2node.get(input_name, None)
|
||||
if next_node:
|
||||
nx_graph.add_edge(input_name, next_node.name)
|
||||
|
||||
for output in onnx_graph.output:
|
||||
output_name = output.name
|
||||
nx_graph.add_node(output_name, op="output", dtype=_get_dtype(output))
|
||||
prev_node = node_outputs2node.get(output_name, None)
|
||||
if prev_node:
|
||||
nx_graph.add_edge(prev_node.name, output_name)
|
||||
else:
|
||||
LOGGER.warning(f"Could not find previous node for {output_name}")
|
||||
|
||||
input_names = [n.name for n in onnx_graph.input]
|
||||
output_names = [n.name for n in onnx_graph.output]
|
||||
most_common_dtype = infer_precision(nx_graph, input_names, output_names, lambda node: node.get("dtype", None))
|
||||
if most_common_dtype is not None:
|
||||
precision = {np.dtype("float32"): Precision.FP32, np.dtype("float16"): Precision.FP16}[most_common_dtype]
|
||||
else:
|
||||
precision = None
|
||||
return precision
|
||||
|
||||
|
||||
class OnnxLoader(BaseLoader):
|
||||
def load(self, model_path: Union[str, Path], **_) -> Model:
|
||||
if isinstance(model_path, Path):
|
||||
model_path = model_path.as_posix()
|
||||
|
||||
model = onnx.load(model_path)
|
||||
onnx.checker.check_model(model)
|
||||
onnx.helper.strip_doc_string(model)
|
||||
model = onnx.shape_inference.infer_shapes(model)
|
||||
|
||||
# TODO: probably modification of onnx model ios causes error on optimize
|
||||
# from onnx.utils import polish_model
|
||||
# model = polish_model(model) # run checker, docs strip, optimizer and shape inference
|
||||
|
||||
inputs = {vi.name: _value_info2tensor_spec(vi) for vi in model.graph.input}
|
||||
outputs = {vi.name: _value_info2tensor_spec(vi) for vi in model.graph.output}
|
||||
|
||||
precision = _infer_graph_precision(model.graph)
|
||||
|
||||
return Model(model, precision, inputs, outputs)
|
||||
|
||||
|
||||
class OnnxSaver(BaseSaver):
|
||||
def __init__(self, as_text: bool = False):
|
||||
self._as_text = as_text
|
||||
|
||||
def save(self, model: Model, model_path: Union[str, Path]) -> None:
|
||||
model_path = Path(model_path)
|
||||
LOGGER.debug(f"Saving ONNX model to {model_path.as_posix()}")
|
||||
model_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
onnx_model: onnx.ModelProto = model.handle
|
||||
if self._as_text:
|
||||
with model_path.open("w") as f:
|
||||
f.write(text_format.MessageToString(onnx_model))
|
||||
else:
|
||||
with model_path.open("wb") as f:
|
||||
f.write(onnx_model.SerializeToString())
|
||||
|
||||
|
||||
"""
|
||||
ExecutionProviders on onnxruntime 1.4.0
|
||||
['TensorrtExecutionProvider',
|
||||
'CUDAExecutionProvider',
|
||||
'MIGraphXExecutionProvider',
|
||||
'NGRAPHExecutionProvider',
|
||||
'OpenVINOExecutionProvider',
|
||||
'DnnlExecutionProvider',
|
||||
'NupharExecutionProvider',
|
||||
'VitisAIExecutionProvider',
|
||||
'ArmNNExecutionProvider',
|
||||
'ACLExecutionProvider',
|
||||
'CPUExecutionProvider']
|
||||
"""
|
||||
|
||||
|
||||
def _check_providers(providers):
|
||||
providers = providers or []
|
||||
if not isinstance(providers, (list, tuple)):
|
||||
providers = [providers]
|
||||
available_providers = onnxruntime.get_available_providers()
|
||||
unavailable = set(providers) - set(available_providers)
|
||||
if unavailable:
|
||||
raise RuntimeError(f"Unavailable providers {unavailable}")
|
||||
return providers
|
||||
|
||||
|
||||
class OnnxRunner(BaseRunner):
|
||||
def __init__(self, verbose_runtime_logs: bool = False):
|
||||
self._providers = None
|
||||
self._verbose_runtime_logs = verbose_runtime_logs
|
||||
|
||||
def init_inference(self, model: Model):
|
||||
assert isinstance(model.handle, onnx.ModelProto)
|
||||
return OnnxRunnerSession(
|
||||
model=model, providers=self._providers, verbose_runtime_logs=self._verbose_runtime_logs
|
||||
)
|
||||
|
||||
|
||||
class OnnxRunnerSession(BaseRunnerSession):
|
||||
def __init__(self, model: Model, providers, verbose_runtime_logs: bool = False):
|
||||
super().__init__(model)
|
||||
self._input_names = None
|
||||
self._output_names = None
|
||||
self._session = None
|
||||
self._providers = providers
|
||||
self._verbose_runtime_logs = verbose_runtime_logs
|
||||
self._old_env_values = {}
|
||||
|
||||
def __enter__(self):
|
||||
self._old_env_values = self._set_env_variables()
|
||||
sess_options = onnxruntime.SessionOptions() # default session options
|
||||
if self._verbose_runtime_logs:
|
||||
sess_options.log_severity_level = 0
|
||||
sess_options.log_verbosity_level = 1
|
||||
LOGGER.info(
|
||||
f"Starting inference session for onnx model providers={self._providers} sess_options={sess_options}"
|
||||
)
|
||||
|
||||
self._input_names = list(self._model.inputs)
|
||||
self._output_names = list(self._model.outputs)
|
||||
|
||||
model_payload = self._model.handle.SerializeToString()
|
||||
self._session = onnxruntime.InferenceSession(
|
||||
model_payload, providers=self._providers, sess_options=sess_options
|
||||
)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._input_names = None
|
||||
self._output_names = None
|
||||
self._session = None
|
||||
self._recover_env_variables(self._old_env_values)
|
||||
|
||||
def __call__(self, x: Dict[str, object]):
|
||||
feed_dict = {k: x[k] for k in self._input_names}
|
||||
y_pred = self._session.run(self._output_names, feed_dict)
|
||||
y_pred = dict(zip(self._output_names, y_pred))
|
||||
|
||||
return y_pred
|
||||
|
||||
|
||||
loaders.register_extension(Format.ONNX.value, OnnxLoader)
|
||||
runners.register_extension(Format.ONNX.value, OnnxRunner)
|
||||
savers.register_extension(Format.ONNX.value, OnnxSaver)
|
|
@ -0,0 +1,100 @@
|
|||
import logging
|
||||
from typing import Dict, Iterable, Optional
|
||||
|
||||
# pytype: disable=import-error
|
||||
import onnx
|
||||
import tensorrt as trt
|
||||
|
||||
from ..core import BaseConverter, Format, Model, Precision, ShapeSpec
|
||||
from ..extensions import converters
|
||||
from .utils import get_input_shapes
|
||||
|
||||
# pytype: enable=import-error
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
|
||||
|
||||
|
||||
class Onnx2TRTConverter(BaseConverter):
|
||||
def __init__(self, *, max_batch_size: int, max_workspace_size: int, precision: str):
|
||||
self._max_batch_size = max_batch_size
|
||||
self._max_workspace_size = max_workspace_size
|
||||
self._precision = Precision(precision)
|
||||
|
||||
def convert(self, model: Model, dataloader_fn) -> Model:
|
||||
input_shapes = get_input_shapes(dataloader_fn(), self._max_batch_size)
|
||||
cuda_engine = onnx2trt(
|
||||
model.handle,
|
||||
shapes=input_shapes,
|
||||
max_workspace_size=self._max_workspace_size,
|
||||
max_batch_size=self._max_batch_size,
|
||||
model_precision=self._precision.value,
|
||||
)
|
||||
return model._replace(handle=cuda_engine)
|
||||
|
||||
@staticmethod
|
||||
def required_source_model_precision(requested_model_precision: Precision) -> Precision:
|
||||
# TensorRT requires source models to be in FP32 precision
|
||||
return Precision.FP32
|
||||
|
||||
|
||||
def onnx2trt(
|
||||
onnx_model: onnx.ModelProto,
|
||||
*,
|
||||
shapes: Dict[str, ShapeSpec],
|
||||
max_workspace_size: int,
|
||||
max_batch_size: int,
|
||||
model_precision: str,
|
||||
) -> "trt.ICudaEngine":
|
||||
"""
|
||||
Converts onnx model to TensorRT ICudaEngine
|
||||
Args:
|
||||
onnx_model: onnx.Model to convert
|
||||
shapes: dictionary containing min shape, max shape, opt shape for each input name
|
||||
max_workspace_size: The maximum GPU temporary memory which the CudaEngine can use at execution time.
|
||||
max_batch_size: The maximum batch size which can be used at execution time,
|
||||
and also the batch size for which the CudaEngine will be optimized.
|
||||
model_precision: precision of kernels (possible values: fp16, fp32)
|
||||
|
||||
Returns: TensorRT ICudaEngine
|
||||
"""
|
||||
# Whether or not 16-bit kernels are permitted.
|
||||
# During :class:`ICudaEngine` build fp16 kernels will also be tried when this mode is enabled.
|
||||
fp16_mode = "16" in model_precision
|
||||
|
||||
builder = trt.Builder(TRT_LOGGER)
|
||||
builder.fp16_mode = fp16_mode
|
||||
builder.max_batch_size = max_batch_size
|
||||
builder.max_workspace_size = max_workspace_size
|
||||
|
||||
# In TensorRT 7.0, the ONNX parser only supports full-dimensions mode,
|
||||
# meaning that your network definition must be created with the explicitBatch flag set.
|
||||
# For more information, see
|
||||
# https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work_dynamic_shapes
|
||||
flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
||||
network = builder.create_network(flags)
|
||||
|
||||
with trt.OnnxParser(network, TRT_LOGGER) as parser:
|
||||
# onnx model parsing
|
||||
if not parser.parse(onnx_model.SerializeToString()):
|
||||
for i in range(parser.num_errors):
|
||||
LOGGER.error(f"OnnxParser error {i}/{parser.num_errors}: {parser.get_error(i)}")
|
||||
raise RuntimeError("Error during parsing ONNX model (see logs for details)")
|
||||
|
||||
# optimization
|
||||
config = builder.create_builder_config()
|
||||
config.flags |= bool(fp16_mode) << int(trt.BuilderFlag.FP16)
|
||||
config.max_workspace_size = max_workspace_size
|
||||
|
||||
profile = builder.create_optimization_profile()
|
||||
for name, spec in shapes.items():
|
||||
profile.set_shape(name, **spec._asdict())
|
||||
|
||||
config.add_optimization_profile(profile)
|
||||
engine = builder.build_engine(network, config=config)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
converters.register_extension(f"{Format.ONNX.value}--{Format.TRT.value}", Onnx2TRTConverter)
|
|
@ -0,0 +1,202 @@
|
|||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, NamedTuple, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
# pytype: disable=import-error
|
||||
try:
|
||||
import pycuda.autoinit
|
||||
import pycuda.driver as cuda
|
||||
except (ImportError, Exception) as e:
|
||||
logging.getLogger(__name__).debug(f"Problems with importing pycuda package; {e}")
|
||||
# pytype: enable=import-error
|
||||
|
||||
import tensorrt as trt # pytype: disable=import-error
|
||||
|
||||
from ..core import BaseLoader, BaseRunner, BaseRunnerSession, BaseSaver, Format, Model, Precision, TensorSpec
|
||||
from ..extensions import loaders, runners, savers
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
|
||||
|
||||
"""
|
||||
documentation:
|
||||
https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html
|
||||
https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#python_samples_section
|
||||
"""
|
||||
|
||||
|
||||
class TensorRTLoader(BaseLoader):
|
||||
def load(self, model_path: Union[str, Path], **_) -> Model:
|
||||
model_path = Path(model_path)
|
||||
LOGGER.debug(f"Loading TensorRT engine from {model_path}")
|
||||
|
||||
with model_path.open("rb") as fh, trt.Runtime(TRT_LOGGER) as runtime:
|
||||
engine = runtime.deserialize_cuda_engine(fh.read())
|
||||
|
||||
if engine is None:
|
||||
raise RuntimeError(f"Could not load ICudaEngine from {model_path}")
|
||||
|
||||
inputs = {}
|
||||
outputs = {}
|
||||
for binding_idx in range(engine.num_bindings):
|
||||
name = engine.get_binding_name(binding_idx)
|
||||
is_input = engine.binding_is_input(binding_idx)
|
||||
dtype = engine.get_binding_dtype(binding_idx)
|
||||
shape = engine.get_binding_shape(binding_idx)
|
||||
if is_input:
|
||||
inputs[name] = TensorSpec(name, dtype, shape)
|
||||
else:
|
||||
outputs[name] = TensorSpec(name, dtype, shape)
|
||||
|
||||
return Model(engine, None, inputs, outputs)
|
||||
|
||||
|
||||
class TensorRTSaver(BaseSaver):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def save(self, model: Model, model_path: Union[str, Path]) -> None:
|
||||
model_path = Path(model_path)
|
||||
LOGGER.debug(f"Saving TensorRT engine to {model_path.as_posix()}")
|
||||
model_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
engine: "trt.ICudaEngine" = model.handle
|
||||
with model_path.open("wb") as fh:
|
||||
fh.write(engine.serialize())
|
||||
|
||||
|
||||
class TRTBuffers(NamedTuple):
|
||||
x_host: Optional[Dict[str, object]]
|
||||
x_dev: Dict[str, object]
|
||||
y_pred_host: Dict[str, object]
|
||||
y_pred_dev: Dict[str, object]
|
||||
|
||||
|
||||
class TensorRTRunner(BaseRunner):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def init_inference(self, model: Model):
|
||||
return TensorRTRunnerSession(model=model)
|
||||
|
||||
|
||||
class TensorRTRunnerSession(BaseRunnerSession):
|
||||
def __init__(self, model: Model):
|
||||
super().__init__(model)
|
||||
assert isinstance(model.handle, trt.ICudaEngine)
|
||||
self._model = model
|
||||
self._has_dynamic_shapes = None
|
||||
|
||||
self._context = None
|
||||
self._engine: trt.ICudaEngine = self._model.handle
|
||||
self._cuda_context = pycuda.autoinit.context
|
||||
|
||||
self._input_names = None
|
||||
self._output_names = None
|
||||
self._buffers = None
|
||||
|
||||
def __enter__(self):
|
||||
self._context = self._engine.create_execution_context()
|
||||
self._context.__enter__()
|
||||
|
||||
self._input_names = [
|
||||
self._engine[idx] for idx in range(self._engine.num_bindings) if self._engine.binding_is_input(idx)
|
||||
]
|
||||
self._output_names = [
|
||||
self._engine[idx] for idx in range(self._engine.num_bindings) if not self._engine.binding_is_input(idx)
|
||||
]
|
||||
# all_binding_shapes_specified is True for models without dynamic shapes
|
||||
# so initially this variable is False for models with dynamic shapes
|
||||
self._has_dynamic_shapes = not self._context.all_binding_shapes_specified
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._context.__exit__(exc_type, exc_value, traceback)
|
||||
self._input_names = None
|
||||
self._output_names = None
|
||||
|
||||
# TODO: are cuda buffers dealloc automatically?
|
||||
self._buffers = None
|
||||
|
||||
def __call__(self, x):
|
||||
buffers = self._prepare_buffers_if_needed(x)
|
||||
bindings = self._update_bindings(buffers)
|
||||
|
||||
for name in self._input_names:
|
||||
cuda.memcpy_htod(buffers.x_dev[name], buffers.x_host[name])
|
||||
self._cuda_context.push()
|
||||
self._context.execute_v2(bindings=bindings)
|
||||
self._cuda_context.pop()
|
||||
for name in self._output_names:
|
||||
cuda.memcpy_dtoh(buffers.y_pred_host[name], buffers.y_pred_dev[name])
|
||||
|
||||
return buffers.y_pred_host
|
||||
|
||||
def _update_bindings(self, buffers: TRTBuffers):
|
||||
bindings = [None] * self._engine.num_bindings
|
||||
for name in buffers.y_pred_dev:
|
||||
binding_idx: int = self._engine[name]
|
||||
bindings[binding_idx] = buffers.y_pred_dev[name]
|
||||
|
||||
for name in buffers.x_dev:
|
||||
binding_idx: int = self._engine[name]
|
||||
bindings[binding_idx] = buffers.x_dev[name]
|
||||
|
||||
return bindings
|
||||
|
||||
def _set_dynamic_input_shapes(self, x_host):
|
||||
def _is_shape_dynamic(input_shape):
|
||||
return any([dim is None or dim == -1 for dim in input_shape])
|
||||
|
||||
for name in self._input_names:
|
||||
bindings_idx = self._engine[name]
|
||||
data_shape = x_host[name].shape # pytype: disable=attribute-error
|
||||
if self._engine.is_shape_binding(bindings_idx):
|
||||
input_shape = self._context.get_shape(bindings_idx)
|
||||
if _is_shape_dynamic(input_shape):
|
||||
self._context.set_shape_input(bindings_idx, data_shape)
|
||||
else:
|
||||
input_shape = self._engine.get_binding_shape(bindings_idx)
|
||||
if _is_shape_dynamic(input_shape):
|
||||
self._context.set_binding_shape(bindings_idx, data_shape)
|
||||
|
||||
assert self._context.all_binding_shapes_specified and self._context.all_shape_inputs_specified
|
||||
|
||||
def _prepare_buffers_if_needed(self, x_host: Dict[str, object]):
|
||||
# pytype: disable=attribute-error
|
||||
new_batch_size = list(x_host.values())[0].shape[0]
|
||||
current_batch_size = list(self._buffers.y_pred_host.values())[0].shape[0] if self._buffers else 0
|
||||
# pytype: enable=attribute-error
|
||||
|
||||
if self._has_dynamic_shapes or new_batch_size != current_batch_size:
|
||||
# TODO: are CUDA buffers dealloc automatically?
|
||||
|
||||
self._set_dynamic_input_shapes(x_host)
|
||||
|
||||
y_pred_host = {}
|
||||
for name in self._output_names:
|
||||
shape = self._context.get_binding_shape(self._engine[name])
|
||||
y_pred_host[name] = np.zeros(shape, dtype=trt.nptype(self._model.outputs[name].dtype))
|
||||
|
||||
y_pred_dev = {name: cuda.mem_alloc(data.nbytes) for name, data in y_pred_host.items()}
|
||||
|
||||
x_dev = {
|
||||
name: cuda.mem_alloc(host_input.nbytes)
|
||||
for name, host_input in x_host.items()
|
||||
if name in self._input_names # pytype: disable=attribute-error
|
||||
}
|
||||
|
||||
self._buffers = TRTBuffers(None, x_dev, y_pred_host, y_pred_dev)
|
||||
|
||||
return self._buffers._replace(x_host=x_host)
|
||||
|
||||
|
||||
if "pycuda.driver" in sys.modules:
|
||||
loaders.register_extension(Format.TRT.value, TensorRTLoader)
|
||||
runners.register_extension(Format.TRT.value, TensorRTRunner)
|
||||
savers.register_extension(Format.TRT.value, TensorRTSaver)
|
||||
else:
|
||||
LOGGER.debug("Do not register TensorRT extension due problems with importing pycuda.driver package.")
|
|
@ -0,0 +1,535 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
# pytype: disable=import-error
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.eager import wrap_function
|
||||
from tf2onnx.shape_inference import infer_shape
|
||||
from tf2onnx.tf_loader import (
|
||||
freeze_session,
|
||||
from_function,
|
||||
inputs_without_resource,
|
||||
is_function,
|
||||
remove_redundant_inputs,
|
||||
tf_optimize,
|
||||
)
|
||||
|
||||
# pytype: enable=import-error
|
||||
|
||||
from ..args import filter_fn_args
|
||||
from ..core import (
|
||||
GET_MODEL_FN_NAME,
|
||||
GET_SERVING_INPUT_RECEIVER_FN,
|
||||
BaseConverter,
|
||||
BaseLoader,
|
||||
BaseRunner,
|
||||
BaseRunnerSession,
|
||||
BaseSaver,
|
||||
Format,
|
||||
Model,
|
||||
Precision,
|
||||
TensorSpec,
|
||||
load_from_file,
|
||||
)
|
||||
from ..extensions import converters, loaders, runners, savers
|
||||
from .utils import infer_precision
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_tf2():
|
||||
return tf.__version__.startswith("2.")
|
||||
|
||||
|
||||
def create_session_config(*, allow_growth=False, use_xla=False, gpu_memory_fraction=1.0):
|
||||
gpu_options = tf.compat.v1.GPUOptions(
|
||||
per_process_gpu_memory_fraction=gpu_memory_fraction, allow_growth=allow_growth
|
||||
)
|
||||
config = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
|
||||
if use_xla:
|
||||
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
|
||||
|
||||
LOGGER.debug(
|
||||
f"Using gpu memory fraction: allow_growth={allow_growth} "
|
||||
f"gpu_memory_fraction={gpu_memory_fraction} "
|
||||
f"use_xla={use_xla}"
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
class TFTRTConverter(BaseConverter):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
is_dynamic_op: bool = False,
|
||||
minimum_segment_size: int = 3,
|
||||
max_batch_size: int = 1,
|
||||
max_workspace_size: int = (4 << 30) - 1000, # ~3.999GB
|
||||
maximum_cached_engines: int = 1000,
|
||||
precision: str,
|
||||
):
|
||||
self._is_dynamic_op = is_dynamic_op
|
||||
self._minimum_segment_size = minimum_segment_size
|
||||
self._max_batch_size = max_batch_size
|
||||
self._max_workspace_size = max_workspace_size
|
||||
self._maximum_cached_engines = maximum_cached_engines
|
||||
self._precision = Precision(precision)
|
||||
|
||||
def convert(self, model: Model, dataloader_fn) -> Model:
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html
|
||||
# converting graph_def is not supported in TF2
|
||||
from tensorflow.python.compiler.tensorrt import trt_convert # pytype: disable=import-error
|
||||
|
||||
assert isinstance(model.handle, tf.compat.v1.GraphDef)
|
||||
|
||||
session_config = create_session_config(allow_growth=True)
|
||||
output_node_names = [spec.name.split(":")[0] for spec in model.outputs.values()]
|
||||
|
||||
converter = trt_convert.TrtGraphConverter(
|
||||
input_graph_def=model.handle,
|
||||
session_config=session_config,
|
||||
nodes_blacklist=output_node_names,
|
||||
is_dynamic_op=self._is_dynamic_op,
|
||||
precision_mode=self._precision.value,
|
||||
max_workspace_size_bytes=self._max_workspace_size,
|
||||
maximum_cached_engines=self._maximum_cached_engines,
|
||||
max_batch_size=self._max_batch_size,
|
||||
minimum_segment_size=self._minimum_segment_size,
|
||||
)
|
||||
graph_def = converter.convert()
|
||||
|
||||
return model._replace(handle=graph_def)
|
||||
|
||||
@staticmethod
|
||||
def required_source_model_precision(requested_model_precision: Precision) -> Precision:
|
||||
# TensorRT requires source models to be in FP32 precision
|
||||
return Precision.FP32
|
||||
|
||||
|
||||
def _from_saved_model_v1(sess, model_path, tag, signatures):
|
||||
"""
|
||||
Load tensorflow graph from saved_model.
|
||||
NOTICE: Modified version from tf2onnx project
|
||||
"""
|
||||
|
||||
wrn_no_tag = "'--tag' not specified for saved_model. Using --tag serve"
|
||||
wrn_empty_tag = "'--tag' value is empty string. Using tag =[[]]"
|
||||
|
||||
if tag is None:
|
||||
tag = [tf.saved_model.SERVING]
|
||||
LOGGER.warning(wrn_no_tag)
|
||||
|
||||
if tag == "":
|
||||
tag = [[]]
|
||||
LOGGER.warning(wrn_empty_tag)
|
||||
|
||||
if not isinstance(tag, list):
|
||||
tag = [tag]
|
||||
|
||||
imported = tf.compat.v1.saved_model.loader.load(sess, tag, model_path)
|
||||
for k in imported.signature_def.keys():
|
||||
if k.startswith("_"):
|
||||
# consider signatures starting with '_' private
|
||||
continue
|
||||
signatures.append(k)
|
||||
try:
|
||||
from tensorflow.contrib.saved_model.python.saved_model import ( # pytype: disable=import-error
|
||||
signature_def_utils,
|
||||
)
|
||||
|
||||
# pylint: disable=unnecessary-lambda
|
||||
get_signature_def = lambda meta_graph_def, k: signature_def_utils.get_signature_def_by_key(meta_graph_def, k)
|
||||
except ImportError:
|
||||
# TF1.12 changed the api
|
||||
get_signature_def = lambda meta_graph_def, k: meta_graph_def.signature_def[k]
|
||||
|
||||
inputs = {}
|
||||
outputs = {}
|
||||
for k in signatures:
|
||||
inputs_tensor_info = get_signature_def(imported, k).inputs
|
||||
for name, input_tensor in inputs_tensor_info.items():
|
||||
inputs[name] = input_tensor.name
|
||||
outputs_tensor_info = get_signature_def(imported, k).outputs
|
||||
for name, output_tensor in outputs_tensor_info.items():
|
||||
outputs[name] = output_tensor.name
|
||||
frozen_graph = freeze_session(sess, input_names=list(inputs.values()), output_names=list(outputs.values()))
|
||||
return frozen_graph, inputs, outputs
|
||||
|
||||
|
||||
def _infer_model_precision(
|
||||
tf_graph: tf.compat.v1.GraphDef, inputs_dict: Dict[str, TensorSpec], outputs_dict: Dict[str, TensorSpec]
|
||||
) -> Optional[Precision]:
|
||||
import networkx as nx
|
||||
|
||||
def _get_dtype(node_def):
|
||||
node_type = node_def.attr.get("T", None) or node_def.attr.get("dtype", None)
|
||||
if node_type:
|
||||
if node_type.list.type:
|
||||
assert len(set(node_type.list.type)) == 1
|
||||
node_type = tf.dtypes.DType(node_type.list.type[0])
|
||||
else:
|
||||
node_type = tf.dtypes.DType(node_type.type)
|
||||
return np.dtype(node_type.as_numpy_dtype()) if node_type and node_type.is_numpy_compatible else node_type
|
||||
|
||||
# build directed graph
|
||||
nx_graph = nx.DiGraph()
|
||||
for node_def in tf_graph.node:
|
||||
nx_graph.add_node(
|
||||
node_def.name,
|
||||
op=node_def.op,
|
||||
**{key: value for key, value in node_def.attr.items() if key not in ["value", "dtype"]},
|
||||
dtype=_get_dtype(node_def),
|
||||
)
|
||||
for input in node_def.input:
|
||||
nx_graph.add_edge(input, node_def.name)
|
||||
|
||||
input_names = [spec.name.split(":")[0] for spec in inputs_dict.values()]
|
||||
output_names = [spec.name.split(":")[0] for spec in outputs_dict.values()]
|
||||
most_common_dtype = infer_precision(nx_graph, input_names, output_names, _get_dtype)
|
||||
if most_common_dtype is not None:
|
||||
precision = {np.dtype("float32"): Precision.FP32, np.dtype("float16"): Precision.FP16}[most_common_dtype]
|
||||
else:
|
||||
precision = None
|
||||
return precision
|
||||
|
||||
|
||||
class TFEstimatorLoader(BaseLoader):
|
||||
required_fn_name_for_signature_parsing: Optional[str] = GET_MODEL_FN_NAME
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._model_args = kwargs
|
||||
|
||||
def load(self, model_path: Union[str, Path], **_) -> Model:
|
||||
if isinstance(model_path, Path):
|
||||
model_path = model_path.as_posix()
|
||||
|
||||
get_model = load_from_file(model_path, "model", GET_MODEL_FN_NAME)
|
||||
get_serving_input_receiver_fn = load_from_file(model_path, "model", GET_SERVING_INPUT_RECEIVER_FN)
|
||||
|
||||
if get_model is None:
|
||||
raise RuntimeError(f"Could not find {GET_MODEL_FN_NAME} in {model_path}")
|
||||
if get_serving_input_receiver_fn is None:
|
||||
raise RuntimeError(f"Could not find {GET_SERVING_INPUT_RECEIVER_FN} in {model_path}")
|
||||
|
||||
model_args = filter_fn_args(self._model_args, fn=get_model)
|
||||
serving_input_receiver_args = filter_fn_args(self._model_args, fn=get_serving_input_receiver_fn)
|
||||
|
||||
session_config = create_session_config(allow_growth=True)
|
||||
tf.compat.v1.reset_default_graph()
|
||||
with tf.compat.v1.Session(config=session_config) as sess:
|
||||
estimator = get_model(**model_args)
|
||||
serving_input_receiver_fn = get_serving_input_receiver_fn(**serving_input_receiver_args)
|
||||
|
||||
input_receiver = serving_input_receiver_fn()
|
||||
estimator_spec = estimator.model_fn(
|
||||
features=input_receiver.features,
|
||||
labels=None,
|
||||
mode=tf.estimator.ModeKeys.PREDICT,
|
||||
config=estimator.config,
|
||||
)
|
||||
|
||||
input_tensors_dict = input_receiver.receiver_tensors
|
||||
output_tensors_dict = estimator_spec.predictions
|
||||
inputs_dict = {k: tensor2tensor_spec(tensor) for k, tensor in input_tensors_dict.items()}
|
||||
outputs_dict = {k: tensor2tensor_spec(tensor) for k, tensor in output_tensors_dict.items()}
|
||||
|
||||
input_tensor_names = [t.name for t in inputs_dict.values()]
|
||||
output_tensor_names = [t.name for t in outputs_dict.values()]
|
||||
|
||||
graph_saver = estimator_spec.scaffold.saver or tf.compat.v1.train.Saver(sharded=True)
|
||||
graph_saver.restore(sess, estimator.latest_checkpoint())
|
||||
|
||||
input_tensor_names = inputs_without_resource(sess, input_tensor_names)
|
||||
frozen_graph = freeze_session(sess, input_names=input_tensor_names, output_names=output_tensor_names)
|
||||
input_tensor_names = remove_redundant_inputs(frozen_graph, input_tensor_names)
|
||||
|
||||
tf.compat.v1.reset_default_graph()
|
||||
with tf.compat.v1.Session(config=estimator.config.session_config):
|
||||
frozen_graph = tf_optimize(input_tensor_names, output_tensor_names, frozen_graph)
|
||||
tf.compat.v1.reset_default_graph()
|
||||
|
||||
precision = _infer_model_precision(frozen_graph, inputs_dict, outputs_dict)
|
||||
|
||||
return Model(frozen_graph, precision, inputs_dict, outputs_dict)
|
||||
|
||||
|
||||
class TFKerasLoader(BaseLoader):
|
||||
"""
|
||||
Loads keras model from source code
|
||||
|
||||
The large-model flag helps loading model which exceeds maximum protobuf size of 2GB. By default it is disabled.
|
||||
|
||||
The tf-allow-growth flag control limiting GPU memory growth feature
|
||||
(https://www.tensorflow.org/guide/gpu#limiting_gpu_memory_growth). By default it is disabled.
|
||||
"""
|
||||
|
||||
required_fn_name_for_signature_parsing: Optional[str] = GET_MODEL_FN_NAME
|
||||
|
||||
def __init__(self, large_model: bool = False, tf_allow_growth: bool = False, **kwargs):
|
||||
self._large_model = large_model
|
||||
self._allow_growth = tf_allow_growth
|
||||
self._model_args = kwargs
|
||||
|
||||
def load(self, model_path: Union[str, Path], **_) -> Model:
|
||||
if isinstance(model_path, Path):
|
||||
model_path = model_path.as_posix()
|
||||
|
||||
get_model = load_from_file(model_path, "model", GET_MODEL_FN_NAME)
|
||||
if get_model is None:
|
||||
raise RuntimeError(f"Could not find {GET_MODEL_FN_NAME} in {model_path}")
|
||||
|
||||
model_args = filter_fn_args(self._model_args, fn=get_model)
|
||||
|
||||
if self._allow_growth:
|
||||
physical_devices = tf.config.experimental.list_physical_devices("GPU")
|
||||
for device in physical_devices:
|
||||
tf.config.experimental.set_memory_growth(device, True)
|
||||
|
||||
tf.keras.backend.clear_session()
|
||||
tf.keras.backend.set_learning_phase(False)
|
||||
|
||||
eager_model, call_fn = get_model(**model_args)
|
||||
|
||||
inputs_dict: Dict[str, TensorSpec] = {
|
||||
input_name: TensorSpec(t.name, t.dtype.name, tuple(t.shape.as_list()))
|
||||
for input_name, t in zip(eager_model.input_names, eager_model.inputs)
|
||||
}
|
||||
|
||||
concrete_func = call_fn.get_concrete_function(
|
||||
*[tf.TensorSpec(shape=spec.shape, dtype=spec.dtype, name=name) for name, spec in inputs_dict.items()]
|
||||
)
|
||||
|
||||
input_tensors_names = [tensor.name for tensor in concrete_func.inputs if tensor.dtype != tf.dtypes.resource]
|
||||
output_tensors_names = [tensor.name for tensor in concrete_func.outputs]
|
||||
|
||||
graph_def = from_function(
|
||||
concrete_func, input_tensors_names, output_tensors_names, large_model=self._large_model
|
||||
)
|
||||
|
||||
# tensor names changes after wrapping with call_fn, thus need to use those from concrete_func
|
||||
outputs_dict: Dict[str, TensorSpec] = {
|
||||
output_name: TensorSpec(output_tensor_name, t.dtype.name, tuple(t.shape.as_list()))
|
||||
for output_name, output_tensor_name, t in zip(
|
||||
eager_model.output_names, output_tensors_names, eager_model.outputs
|
||||
)
|
||||
}
|
||||
|
||||
precision = _infer_model_precision(graph_def, inputs_dict, outputs_dict)
|
||||
|
||||
tf.keras.backend.clear_session()
|
||||
tf.keras.backend.set_learning_phase(False)
|
||||
|
||||
return Model(graph_def, precision, inputs_dict, outputs_dict)
|
||||
|
||||
|
||||
class TFSavedModelLoader(BaseLoader):
|
||||
def load(self, model_path: Union[str, Path], **kwargs) -> Model:
|
||||
if isinstance(model_path, Path):
|
||||
model_path = model_path.as_posix()
|
||||
tf.compat.v1.reset_default_graph()
|
||||
if is_tf2():
|
||||
from tf2onnx.tf_loader import _from_saved_model_v2 # pytype: disable=import-error
|
||||
|
||||
graph_def, input_names, output_names, concrete_func, imported, initialized_tables = _from_saved_model_v2(
|
||||
model_path=model_path,
|
||||
input_names=None,
|
||||
output_names=None,
|
||||
tag=None,
|
||||
signature_def=[],
|
||||
concrete_function_index=None,
|
||||
large_model=False,
|
||||
)
|
||||
|
||||
# inspired by https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/saved_model_cli.py#L205
|
||||
if concrete_func.structured_input_signature:
|
||||
input_args, input_kwargs = concrete_func.structured_input_signature
|
||||
input_names = list(input_kwargs)
|
||||
assert (
|
||||
not input_args
|
||||
), f"Not supported args in concrete function signature args={input_args}, kwargs={input_kwargs}"
|
||||
elif concrete_func._arg_keywords: # pylint: disable=protected-access
|
||||
# For pure ConcreteFunctions we might have nothing better than _arg_keywords.
|
||||
assert concrete_func._num_positional_args in [0, 1]
|
||||
input_names = concrete_func._arg_keywords
|
||||
|
||||
input_tensors = [tensor for tensor in concrete_func.inputs if tensor.dtype != tf.dtypes.resource]
|
||||
inputs = {name: tensor.name for name, tensor in zip(input_names, input_tensors)}
|
||||
|
||||
# they are already flattened
|
||||
output_tensors = [tensor for tensor in concrete_func.outputs if tensor.dtype != tf.dtypes.resource]
|
||||
output_names = sorted(concrete_func.structured_outputs) # because outputs are in flatten form
|
||||
outputs = {name: tensor.name for name, tensor in zip(output_names, output_tensors)}
|
||||
else:
|
||||
session_config = create_session_config(allow_growth=True)
|
||||
with tf.compat.v1.Session(config=session_config) as sess:
|
||||
graph_def, inputs, outputs = _from_saved_model_v1(sess, model_path, tag=None, signatures=[])
|
||||
|
||||
inputs, outputs = handle_tensor_specs(graph_def, inputs, outputs)
|
||||
|
||||
precision = _infer_model_precision(graph_def, inputs, outputs)
|
||||
|
||||
return Model(graph_def, precision, inputs, outputs)
|
||||
|
||||
|
||||
class TFRunner(BaseRunner):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def init_inference(self, model: Model):
|
||||
if is_tf2():
|
||||
return TF2RunnerSession(model=model)
|
||||
else:
|
||||
return TF1RunnerSession(model=model)
|
||||
|
||||
|
||||
class TF1RunnerSession(BaseRunnerSession):
|
||||
def __init__(self, model: Model):
|
||||
super().__init__(model)
|
||||
|
||||
assert isinstance(model.handle, tf.compat.v1.GraphDef)
|
||||
|
||||
self._inputs = None
|
||||
self._outputs = None
|
||||
self._session = None
|
||||
self._old_env_values = {}
|
||||
|
||||
def __enter__(self):
|
||||
self._old_env_values = self._set_env_variables()
|
||||
|
||||
tf.compat.v1.reset_default_graph()
|
||||
|
||||
session_config = create_session_config(allow_growth=True)
|
||||
self._session = tf.compat.v1.Session(config=session_config)
|
||||
self._session.__enter__()
|
||||
|
||||
tf.import_graph_def(self._model.handle, name="")
|
||||
|
||||
self._inputs = {
|
||||
name: self._session.graph.get_tensor_by_name(spec.name) for name, spec in self._model.inputs.items()
|
||||
}
|
||||
self._outputs = {
|
||||
name: self._session.graph.get_tensor_by_name(spec.name) for name, spec in self._model.outputs.items()
|
||||
}
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._session.__exit__(exc_type, exc_value, traceback)
|
||||
tf.compat.v1.reset_default_graph()
|
||||
self._inputs = None
|
||||
self._outputs = None
|
||||
self._session = None
|
||||
self._recover_env_variables(self._old_env_values)
|
||||
|
||||
def __call__(self, x: Dict[str, object]):
|
||||
feed_dict = {placeholder: x[name] for name, placeholder in self._inputs.items()}
|
||||
return self._session.run(self._outputs, feed_dict=feed_dict)
|
||||
|
||||
|
||||
class TF2RunnerSession(BaseRunnerSession):
|
||||
def __init__(self, model: Model):
|
||||
super().__init__(model)
|
||||
assert isinstance(model.handle, tf.compat.v1.GraphDef)
|
||||
self._concrete_func = None
|
||||
|
||||
def __enter__(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
input_tensor_names = [spec.name for spec in self._model.inputs.values()]
|
||||
output_tensor_names = [spec.name for spec in self._model.outputs.values()]
|
||||
self._concrete_func = wrap_function.function_from_graph_def(
|
||||
self._model.handle, input_tensor_names, output_tensor_names
|
||||
)
|
||||
self._concrete_func._signature = [
|
||||
tf.TensorSpec(shape=spec.shape, dtype=spec.dtype, name=name) for name, spec in self._model.inputs.items()
|
||||
]
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._concrete_func = None
|
||||
tf.compat.v1.reset_default_graph()
|
||||
|
||||
def __call__(self, x: Dict[str, object]):
|
||||
x = tf.nest.map_structure(tf.convert_to_tensor, x)
|
||||
y_pred = self._concrete_func(**x)
|
||||
output_struct = {name: spec.name for name, spec in self._model.outputs.items()}
|
||||
y_pred = tf.nest.map_structure(lambda t: t.numpy(), y_pred)
|
||||
y_pred = tf.nest.pack_sequence_as(output_struct, y_pred)
|
||||
return y_pred
|
||||
|
||||
|
||||
class TFSavedModelSaver(BaseSaver):
|
||||
def save(self, model: Model, model_path: Union[str, Path]) -> None:
|
||||
if isinstance(model_path, Path):
|
||||
model_path = model_path.as_posix()
|
||||
|
||||
session_config = create_session_config(allow_growth=True)
|
||||
with tf.compat.v1.Session(config=session_config) as sess:
|
||||
tf.import_graph_def(model.handle, name="")
|
||||
|
||||
is_func = is_function(sess.graph)
|
||||
if not is_func:
|
||||
infer_shape(sess.graph, {})
|
||||
|
||||
inputs = {name: sess.graph.get_tensor_by_name(spec.name) for name, spec in model.inputs.items()}
|
||||
outputs = {name: sess.graph.get_tensor_by_name(spec.name) for name, spec in model.outputs.items()}
|
||||
|
||||
def _ensure_shape(tensors_dict, tensors_specs):
|
||||
for name, tensor in tensors_dict.items():
|
||||
if tensor.shape.rank is None:
|
||||
tensor.set_shape(tensors_specs[name].shape)
|
||||
return tensors_dict
|
||||
|
||||
inputs = _ensure_shape(inputs, model.inputs)
|
||||
outputs = _ensure_shape(outputs, model.outputs)
|
||||
|
||||
LOGGER.info(inputs)
|
||||
LOGGER.info(outputs)
|
||||
|
||||
tf.compat.v1.saved_model.simple_save(sess, model_path, inputs, outputs, legacy_init_op=None)
|
||||
|
||||
|
||||
def handle_tensor_specs(
|
||||
graph_def, inputs: Dict[str, str], outputs: Dict[str, str]
|
||||
) -> Tuple[Dict[str, TensorSpec], Dict[str, TensorSpec]]:
|
||||
session_config = tf.compat.v1.ConfigProto(graph_options=tf.compat.v1.GraphOptions(infer_shapes=True))
|
||||
tf.compat.v1.reset_default_graph()
|
||||
with tf.compat.v1.Session(config=session_config) as sess:
|
||||
tf.import_graph_def(graph_def, name="")
|
||||
|
||||
def _get_spec(tensors_dict):
|
||||
tensors_dict = {name: sess.graph.get_tensor_by_name(tname) for name, tname in tensors_dict.items()}
|
||||
return {name: tensor2tensor_spec(tensor) for name, tensor in tensors_dict.items()}
|
||||
|
||||
inputs = _get_spec(inputs)
|
||||
outputs = _get_spec(outputs)
|
||||
|
||||
tf.compat.v1.reset_default_graph()
|
||||
return inputs, outputs
|
||||
|
||||
|
||||
def tensor2tensor_spec(tensor):
|
||||
shape = tuple([s.value if hasattr(s, "value") else s for s in tensor.shape])
|
||||
return TensorSpec(tensor.name, tensor.dtype.name, shape)
|
||||
|
||||
|
||||
loaders.register_extension(Format.TF_ESTIMATOR.value, TFEstimatorLoader)
|
||||
loaders.register_extension(Format.TF_KERAS.value, TFKerasLoader)
|
||||
loaders.register_extension(Format.TF_SAVEDMODEL.value, TFSavedModelLoader)
|
||||
loaders.register_extension(Format.TF_TRT.value, TFSavedModelLoader)
|
||||
|
||||
converters.register_extension(f"{Format.TF_ESTIMATOR.value}--{Format.TF_SAVEDMODEL.value}", None)
|
||||
converters.register_extension(f"{Format.TF_KERAS.value}--{Format.TF_SAVEDMODEL.value}", None)
|
||||
converters.register_extension(f"{Format.TF_SAVEDMODEL.value}--{Format.TF_SAVEDMODEL.value}", None)
|
||||
converters.register_extension(f"{Format.TF_ESTIMATOR.value}--{Format.TF_TRT.value}", TFTRTConverter)
|
||||
converters.register_extension(f"{Format.TF_KERAS.value}--{Format.TF_TRT.value}", TFTRTConverter)
|
||||
converters.register_extension(f"{Format.TF_SAVEDMODEL.value}--{Format.TF_TRT.value}", TFTRTConverter)
|
||||
|
||||
savers.register_extension(Format.TF_SAVEDMODEL.value, TFSavedModelSaver)
|
||||
savers.register_extension(Format.TF_TRT.value, TFSavedModelSaver)
|
||||
|
||||
runners.register_extension(Format.TF_ESTIMATOR.value, TFRunner)
|
||||
runners.register_extension(Format.TF_KERAS.value, TFRunner)
|
||||
runners.register_extension(Format.TF_SAVEDMODEL.value, TFRunner)
|
||||
runners.register_extension(Format.TF_TRT.value, TFRunner)
|
|
@ -0,0 +1,89 @@
|
|||
from collections import Iterable
|
||||
|
||||
# pytype: disable=import-error
|
||||
import onnx
|
||||
import onnx.shape_inference
|
||||
import tensorflow as tf
|
||||
from tf2onnx import optimizer, tfonnx
|
||||
|
||||
# pytype: enable=import-error
|
||||
|
||||
from ..core import BaseConverter, Format, Model
|
||||
from ..extensions import converters
|
||||
from .tf import create_session_config
|
||||
|
||||
|
||||
def _replace_io_names(graph_proto, io_type, name2tensor):
|
||||
tensor2name = {v: k for k, v in name2tensor.items()}
|
||||
tensor_value_info_list = {"inputs": graph_proto.input, "outputs": graph_proto.output}[io_type]
|
||||
for tensor_value_info in tensor_value_info_list:
|
||||
old_name = tensor_value_info.name
|
||||
new_name = tensor2name.get(old_name)
|
||||
if new_name is not None and new_name != old_name:
|
||||
tensor_value_info.name = new_name
|
||||
# replace other graph nodes I/O
|
||||
for node in graph_proto.node:
|
||||
if old_name in node.input:
|
||||
idx = list(node.input).index(old_name)
|
||||
node.input[idx] = new_name
|
||||
if old_name in node.output:
|
||||
idx = list(node.output).index(old_name)
|
||||
node.output[idx] = new_name
|
||||
|
||||
|
||||
def tfgraph2onnx(graph_def, inputnames2tensornames, outputnames2tensornames, *, onnx_opset, onnx_optimized=True):
|
||||
with tf.Graph().as_default() as tf_graph:
|
||||
tf.import_graph_def(graph_def, name="")
|
||||
session_config = create_session_config(allow_growth=True)
|
||||
with tf.compat.v1.Session(graph=tf_graph, config=session_config):
|
||||
input_tensor_names = list(inputnames2tensornames.values())
|
||||
output_tensor_names = list(outputnames2tensornames.values())
|
||||
onnx_graph = tfonnx.process_tf_graph(
|
||||
tf_graph,
|
||||
input_names=input_tensor_names,
|
||||
output_names=output_tensor_names,
|
||||
opset=onnx_opset,
|
||||
)
|
||||
if onnx_optimized:
|
||||
onnx_graph = optimizer.optimize_graph(onnx_graph)
|
||||
graph_doc: str = "triton export"
|
||||
onnx_model = onnx_graph.make_model(graph_doc)
|
||||
|
||||
# to match tensorflow savedmodel signature
|
||||
_replace_io_names(onnx_model.graph, "inputs", inputnames2tensornames)
|
||||
_replace_io_names(onnx_model.graph, "outputs", outputnames2tensornames)
|
||||
|
||||
onnx.checker.check_model(onnx_model)
|
||||
onnx.helper.strip_doc_string(onnx_model)
|
||||
onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
|
||||
|
||||
return onnx_model
|
||||
|
||||
|
||||
class TFGraphDef2ONNXConverter(BaseConverter):
|
||||
def __init__(self, *, onnx_opset: int, onnx_optimized: bool = True):
|
||||
self._onnx_opset = onnx_opset
|
||||
self._onnx_optimized = onnx_optimized
|
||||
|
||||
def convert(self, model: Model, dataloader_fn) -> Model:
|
||||
assert isinstance(model.handle, tf.compat.v1.GraphDef)
|
||||
|
||||
inputnames2tensorname = {name: spec.name for name, spec in model.inputs.items()}
|
||||
outputnames2tensorname = {name: spec.name for name, spec in model.outputs.items()}
|
||||
onnx_model = tfgraph2onnx(
|
||||
model.handle,
|
||||
inputnames2tensorname,
|
||||
outputnames2tensorname,
|
||||
onnx_opset=self._onnx_opset,
|
||||
onnx_optimized=self._onnx_optimized,
|
||||
)
|
||||
from .onnx import _infer_graph_precision
|
||||
|
||||
precision = _infer_graph_precision(onnx_model.graph)
|
||||
assert precision == model.precision # for testing precision inference function
|
||||
return model._replace(handle=onnx_model)
|
||||
|
||||
|
||||
converters.register_extension(f"{Format.TF_ESTIMATOR.value}--{Format.ONNX.value}", TFGraphDef2ONNXConverter)
|
||||
converters.register_extension(f"{Format.TF_KERAS.value}--{Format.ONNX.value}", TFGraphDef2ONNXConverter)
|
||||
converters.register_extension(f"{Format.TF_SAVEDMODEL.value}--{Format.ONNX.value}", TFGraphDef2ONNXConverter)
|
|
@ -0,0 +1,60 @@
|
|||
from typing import Iterable
|
||||
|
||||
from ..core import BaseConverter, Format, Model, Precision, ShapeSpec
|
||||
from ..extensions import converters
|
||||
from .onnx2trt_conv import onnx2trt
|
||||
from .tf2onnx_conv import tfgraph2onnx
|
||||
from .utils import get_input_shapes
|
||||
|
||||
|
||||
class TFGraphDef2TRTConverter(BaseConverter):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
max_batch_size: int,
|
||||
max_workspace_size: int,
|
||||
onnx_opset: int,
|
||||
onnx_optimized: bool = True,
|
||||
precision: str,
|
||||
):
|
||||
self._max_batch_size = max_batch_size
|
||||
self._max_workspace_size = max_workspace_size
|
||||
self._onnx_opset = onnx_opset
|
||||
self._onnx_optimized = onnx_optimized
|
||||
self._precision = Precision(precision)
|
||||
|
||||
def convert(self, model: Model, dataloader_fn) -> Model:
|
||||
inputnames2tensorname = {name: spec.name for name, spec in model.inputs.items()}
|
||||
outputnames2tensorname = {name: spec.name for name, spec in model.outputs.items()}
|
||||
onnx_model = tfgraph2onnx(
|
||||
model.handle,
|
||||
inputnames2tensorname,
|
||||
outputnames2tensorname,
|
||||
onnx_opset=self._onnx_opset,
|
||||
onnx_optimized=self._onnx_optimized,
|
||||
)
|
||||
|
||||
from .onnx import _infer_graph_precision
|
||||
|
||||
precision = _infer_graph_precision(onnx_model.graph)
|
||||
assert precision == model.precision # for testing precision inference function
|
||||
|
||||
input_shapes = get_input_shapes(dataloader_fn(), self._max_batch_size)
|
||||
cuda_engine = onnx2trt(
|
||||
onnx_model,
|
||||
shapes=input_shapes,
|
||||
max_workspace_size=self._max_workspace_size,
|
||||
max_batch_size=self._max_batch_size,
|
||||
model_precision=self._precision.value,
|
||||
)
|
||||
return model._replace(handle=cuda_engine)
|
||||
|
||||
@staticmethod
|
||||
def required_source_model_precision(requested_model_precision: Precision) -> Precision:
|
||||
# TensorRT requires source models to be in FP32 precision
|
||||
return Precision.FP32
|
||||
|
||||
|
||||
converters.register_extension(f"{Format.TF_ESTIMATOR.value}--{Format.TRT.value}", TFGraphDef2TRTConverter)
|
||||
converters.register_extension(f"{Format.TF_KERAS.value}--{Format.TRT.value}", TFGraphDef2TRTConverter)
|
||||
converters.register_extension(f"{Format.TF_SAVEDMODEL.value}--{Format.TRT.value}", TFGraphDef2TRTConverter)
|
|
@ -0,0 +1,107 @@
|
|||
from collections import Counter
|
||||
from typing import Callable, Dict, List
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from ..core import ShapeSpec
|
||||
|
||||
|
||||
def infer_precision(
|
||||
nx_graph: nx.Graph,
|
||||
input_names: List[str],
|
||||
output_names: List[str],
|
||||
get_node_dtype_fn: Callable,
|
||||
):
|
||||
node_dtypes = [nx_graph.nodes[node_name].get("dtype", None) for node_name in nx_graph.nodes]
|
||||
node_dtypes = [dt for dt in node_dtypes if dt is None or dt.kind not in ["i", "b"]]
|
||||
dtypes_counter = Counter(node_dtypes)
|
||||
return dtypes_counter.most_common()[0][0]
|
||||
|
||||
|
||||
def get_shapes_with_dynamic_axes(dataloader, batch_size_dim=0):
|
||||
def _set_dynamic_shapes(t, shapes):
|
||||
for k, v in t.items():
|
||||
shape = list(v.shape)
|
||||
for dim, s in enumerate(shape):
|
||||
if shapes[k][dim] != -1 and shapes[k][dim] != s:
|
||||
shapes[k][dim] = -1
|
||||
|
||||
## get all shapes from input and output tensors
|
||||
input_shapes = {}
|
||||
output_shapes = {}
|
||||
for batch in dataloader:
|
||||
_, x, y = batch
|
||||
for k, v in x.items():
|
||||
input_shapes[k] = list(v.shape)
|
||||
for k, v in y.items():
|
||||
output_shapes[k] = list(v.shape)
|
||||
break
|
||||
|
||||
# based on max <max_num_iters> iterations, check which
|
||||
# dimensions differ to determine dynamic_axes
|
||||
max_num_iters = 100
|
||||
for idx, batch in enumerate(dataloader):
|
||||
if idx >= max_num_iters:
|
||||
break
|
||||
|
||||
_, x, y = batch
|
||||
|
||||
_set_dynamic_shapes(x, input_shapes)
|
||||
_set_dynamic_shapes(y, output_shapes)
|
||||
|
||||
return input_shapes, output_shapes
|
||||
|
||||
|
||||
def get_dynamic_axes(dataloader, batch_size_dim=0):
|
||||
input_shapes, output_shapes = get_shapes_with_dynamic_axes(dataloader, batch_size_dim)
|
||||
all_shapes = {**input_shapes, **output_shapes}
|
||||
dynamic_axes = {}
|
||||
|
||||
for k, shape in all_shapes.items():
|
||||
for idx, s in enumerate(shape):
|
||||
if s == -1:
|
||||
dynamic_axes[k] = {idx: k + "_" + str(idx)}
|
||||
|
||||
for k, v in all_shapes.items():
|
||||
if k in dynamic_axes:
|
||||
dynamic_axes[k].update({batch_size_dim: "batch_size_" + str(batch_size_dim)})
|
||||
else:
|
||||
dynamic_axes[k] = {batch_size_dim: "batch_size_" + str(batch_size_dim)}
|
||||
|
||||
return dynamic_axes
|
||||
|
||||
|
||||
def get_input_shapes(dataloader, max_batch_size=1) -> Dict[str, ShapeSpec]:
|
||||
def init_counters_and_shapes(x, counters, min_shapes, max_shapes):
|
||||
for k, v in x.items():
|
||||
counters[k] = Counter()
|
||||
min_shapes[k] = [float("inf")] * v.ndim
|
||||
max_shapes[k] = [float("-inf")] * v.ndim
|
||||
|
||||
counters = {}
|
||||
min_shapes: Dict[str, tuple] = {}
|
||||
max_shapes: Dict[str, tuple] = {}
|
||||
for idx, batch in enumerate(dataloader):
|
||||
ids, x, y = batch
|
||||
|
||||
if idx == 0:
|
||||
init_counters_and_shapes(x, counters, min_shapes, max_shapes)
|
||||
|
||||
for k, v in x.items():
|
||||
shape = v.shape
|
||||
counters[k][shape] += 1
|
||||
min_shapes[k] = tuple([min(a, b) for a, b in zip(min_shapes[k], shape)])
|
||||
max_shapes[k] = tuple([max(a, b) for a, b in zip(max_shapes[k], shape)])
|
||||
|
||||
opt_shapes: Dict[str, tuple] = {}
|
||||
for k, v in counters.items():
|
||||
opt_shapes[k] = v.most_common(1)[0][0]
|
||||
|
||||
shapes = {}
|
||||
for k in opt_shapes.keys(): # same keys in min_shapes and max_shapes
|
||||
shapes[k] = ShapeSpec(
|
||||
min=(1,) + min_shapes[k][1:],
|
||||
max=(max_batch_size,) + max_shapes[k][1:],
|
||||
opt=(max_batch_size,) + opt_shapes[k][1:],
|
||||
)
|
||||
return shapes
|
|
@ -0,0 +1,169 @@
|
|||
import abc
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
DATALOADER_FN_NAME = "get_dataloader_fn"
|
||||
GET_MODEL_FN_NAME = "get_model"
|
||||
GET_SERVING_INPUT_RECEIVER_FN = "get_serving_input_receiver_fn"
|
||||
GET_ARGPARSER_FN_NAME = "update_argparser"
|
||||
|
||||
|
||||
class TensorSpec(NamedTuple):
|
||||
name: str
|
||||
dtype: str
|
||||
shape: Tuple
|
||||
|
||||
|
||||
class Parameter(Enum):
|
||||
def __lt__(self, other: "Parameter") -> bool:
|
||||
return self.value < other.value
|
||||
|
||||
|
||||
class Accelerator(Parameter):
|
||||
AMP = "amp"
|
||||
CUDA = "cuda"
|
||||
TRT = "trt"
|
||||
|
||||
|
||||
class Precision(Parameter):
|
||||
FP16 = "fp16"
|
||||
FP32 = "fp32"
|
||||
TF32 = "tf32" # Deprecated
|
||||
|
||||
|
||||
class Format(Parameter):
|
||||
TF_GRAPHDEF = "tf-graphdef"
|
||||
TF_SAVEDMODEL = "tf-savedmodel"
|
||||
TF_TRT = "tf-trt"
|
||||
TF_ESTIMATOR = "tf-estimator"
|
||||
TF_KERAS = "tf-keras"
|
||||
ONNX = "onnx"
|
||||
TRT = "trt"
|
||||
TS_SCRIPT = "ts-script"
|
||||
TS_TRACE = "ts-trace"
|
||||
PYT = "pyt"
|
||||
|
||||
|
||||
class Model(NamedTuple):
|
||||
handle: object
|
||||
precision: Optional[Precision]
|
||||
inputs: Dict[str, TensorSpec]
|
||||
outputs: Dict[str, TensorSpec]
|
||||
|
||||
|
||||
def load_from_file(file_path, label, target):
|
||||
spec = importlib.util.spec_from_file_location(name=label, location=file_path)
|
||||
my_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(my_module) # pytype: disable=attribute-error
|
||||
return getattr(my_module, target, None)
|
||||
|
||||
|
||||
class BaseLoader(abc.ABC):
|
||||
required_fn_name_for_signature_parsing: Optional[str] = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def load(self, model_path: Union[str, Path], **kwargs) -> Model:
|
||||
"""
|
||||
Loads and process model from file based on given set of args
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BaseSaver(abc.ABC):
|
||||
required_fn_name_for_signature_parsing: Optional[str] = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def save(self, model: Model, model_path: Union[str, Path]) -> None:
|
||||
"""
|
||||
Save model to file
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BaseRunner(abc.ABC):
|
||||
required_fn_name_for_signature_parsing: Optional[str] = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def init_inference(self, model: Model):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class BaseRunnerSession(abc.ABC):
|
||||
def __init__(self, model: Model):
|
||||
self._model = model
|
||||
|
||||
@abc.abstractmethod
|
||||
def __enter__(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, x: Dict[str, object]):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _set_env_variables(self) -> Dict[str, object]:
|
||||
"""this method not remove values; fix it if needed"""
|
||||
to_set = {}
|
||||
old_values = {k: os.environ.pop(k, None) for k in to_set}
|
||||
os.environ.update(to_set)
|
||||
return old_values
|
||||
|
||||
def _recover_env_variables(self, old_envs: Dict[str, object]):
|
||||
for name, value in old_envs.items():
|
||||
if value is None:
|
||||
del os.environ[name]
|
||||
else:
|
||||
os.environ[name] = str(value)
|
||||
|
||||
|
||||
class BaseConverter(abc.ABC):
|
||||
required_fn_name_for_signature_parsing: Optional[str] = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def convert(self, model: Model, dataloader_fn) -> Model:
|
||||
raise NotImplementedError()
|
||||
|
||||
@staticmethod
|
||||
def required_source_model_precision(requested_model_precision: Precision) -> Precision:
|
||||
return requested_model_precision
|
||||
|
||||
|
||||
class BaseMetricsCalculator(abc.ABC):
|
||||
required_fn_name_for_signature_parsing: Optional[str] = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def calc(
|
||||
self,
|
||||
*,
|
||||
ids: List[Any],
|
||||
y_pred: Dict[str, np.ndarray],
|
||||
x: Optional[Dict[str, np.ndarray]],
|
||||
y_real: Optional[Dict[str, np.ndarray]],
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Calculates error/accuracy metrics
|
||||
Args:
|
||||
ids: List of ids identifying each sample in the batch
|
||||
y_pred: model output as dict where key is output name and value is output value
|
||||
x: model input as dict where key is input name and value is input value
|
||||
y_real: input ground truth as dict where key is output name and value is output value
|
||||
Returns:
|
||||
dictionary where key is metric name and value is its value
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ShapeSpec(NamedTuple):
|
||||
min: Tuple
|
||||
opt: Tuple
|
||||
max: Tuple
|
|
@ -0,0 +1,133 @@
|
|||
from pathlib import Path
|
||||
from typing import Dict, Iterable
|
||||
|
||||
import numpy as np
|
||||
|
||||
MB2B = 2 ** 20
|
||||
B2MB = 1 / MB2B
|
||||
FLUSH_THRESHOLD_B = 256 * MB2B
|
||||
|
||||
|
||||
def pad_except_batch_axis(data: np.ndarray, target_shape_with_batch_axis: Iterable[int]):
|
||||
assert all(
|
||||
[current_size <= target_size for target_size, current_size in zip(target_shape_with_batch_axis, data.shape)]
|
||||
), "target_shape should have equal or greater all dimensions comparing to data.shape"
|
||||
padding = [(0, 0)] + [ # (0, 0) - do not pad on batch_axis (with index 0)
|
||||
(0, target_size - current_size)
|
||||
for target_size, current_size in zip(target_shape_with_batch_axis[1:], data.shape[1:])
|
||||
]
|
||||
return np.pad(data, padding, "constant", constant_values=np.nan)
|
||||
|
||||
|
||||
class NpzWriter:
|
||||
"""
|
||||
Dumps dicts of numpy arrays into npz files
|
||||
|
||||
It can/shall be used as context manager:
|
||||
```
|
||||
with OutputWriter('mydir') as writer:
|
||||
writer.write(outputs={'classes': np.zeros(8), 'probs': np.zeros((8, 4))},
|
||||
labels={'classes': np.zeros(8)},
|
||||
inputs={'input': np.zeros((8, 240, 240, 3)})
|
||||
```
|
||||
|
||||
## Variable size data
|
||||
|
||||
Only dynamic of last axis is handled. Data is padded with np.nan value.
|
||||
Also each generated file may have different size of dynamic axis.
|
||||
"""
|
||||
|
||||
def __init__(self, output_dir, compress=False):
|
||||
self._output_dir = Path(output_dir)
|
||||
self._items_cache: Dict[str, Dict[str, np.ndarray]] = {}
|
||||
self._items_counters: Dict[str, int] = {}
|
||||
self._flush_threshold_b = FLUSH_THRESHOLD_B
|
||||
self._compress = compress
|
||||
|
||||
@property
|
||||
def cache_size(self):
|
||||
return {name: sum([a.nbytes for a in data.values()]) for name, data in self._items_cache.items()}
|
||||
|
||||
def _append_to_cache(self, prefix, data):
|
||||
if data is None:
|
||||
return
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"{prefix} data to store shall be dict")
|
||||
|
||||
cached_data = self._items_cache.get(prefix, {})
|
||||
for name, value in data.items():
|
||||
assert isinstance(
|
||||
value, (list, np.ndarray)
|
||||
), f"Values shall be lists or np.ndarrays; current type {type(value)}"
|
||||
if not isinstance(value, np.ndarray):
|
||||
value = np.array(value)
|
||||
|
||||
assert value.dtype.kind in ["S", "U"] or not np.any(
|
||||
np.isnan(value)
|
||||
), f"Values with np.nan is not supported; {name}={value}"
|
||||
cached_value = cached_data.get(name, None)
|
||||
if cached_value is not None:
|
||||
target_shape = np.max([cached_value.shape, value.shape], axis=0)
|
||||
cached_value = pad_except_batch_axis(cached_value, target_shape)
|
||||
value = pad_except_batch_axis(value, target_shape)
|
||||
value = np.concatenate((cached_value, value))
|
||||
cached_data[name] = value
|
||||
self._items_cache[prefix] = cached_data
|
||||
|
||||
def write(self, **kwargs):
|
||||
"""
|
||||
Writes named list of dictionaries of np.ndarrays.
|
||||
Finally keyword names will be later prefixes of npz files where those dictionaries will be stored.
|
||||
|
||||
ex. writer.write(inputs={'input': np.zeros((2, 10))},
|
||||
outputs={'classes': np.zeros((2,)), 'probabilities': np.zeros((2, 32))},
|
||||
labels={'classes': np.zeros((2,))})
|
||||
Args:
|
||||
**kwargs: named list of dictionaries of np.ndarrays to store
|
||||
"""
|
||||
|
||||
for prefix, data in kwargs.items():
|
||||
self._append_to_cache(prefix, data)
|
||||
|
||||
biggest_item_size = max(self.cache_size.values())
|
||||
if biggest_item_size > self._flush_threshold_b:
|
||||
self.flush()
|
||||
|
||||
def flush(self):
|
||||
for prefix, data in self._items_cache.items():
|
||||
self._dump(prefix, data)
|
||||
self._items_cache = {}
|
||||
|
||||
def _dump(self, prefix, data):
|
||||
idx = self._items_counters.setdefault(prefix, 0)
|
||||
filename = f"{prefix}-{idx:012d}.npz"
|
||||
output_path = self._output_dir / filename
|
||||
if self._compress:
|
||||
np.savez_compressed(output_path, **data)
|
||||
else:
|
||||
np.savez(output_path, **data)
|
||||
|
||||
nitems = len(list(data.values())[0])
|
||||
|
||||
msg_for_labels = (
|
||||
"If these are correct shapes - consider moving loading of them into metrics.py."
|
||||
if prefix == "labels"
|
||||
else ""
|
||||
)
|
||||
shapes = {name: value.shape if isinstance(value, np.ndarray) else (len(value),) for name, value in data.items()}
|
||||
|
||||
assert all(len(v) == nitems for v in data.values()), (
|
||||
f'All items in "{prefix}" shall have same size on 0 axis equal to batch size. {msg_for_labels}'
|
||||
f'{", ".join(f"{name}: {shape}" for name, shape in shapes.items())}'
|
||||
)
|
||||
self._items_counters[prefix] += nitems
|
||||
|
||||
def __enter__(self):
|
||||
if self._output_dir.exists() and len(list(self._output_dir.iterdir())):
|
||||
raise ValueError(f"{self._output_dir.as_posix()} is not empty")
|
||||
self._output_dir.mkdir(parents=True, exist_ok=True)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.flush()
|
|
@ -0,0 +1,69 @@
|
|||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtensionManager:
|
||||
def __init__(self, name: str):
|
||||
self._name = name
|
||||
self._registry = {}
|
||||
|
||||
def register_extension(self, extension: str, clazz):
|
||||
already_registered_class = self._registry.get(extension, None)
|
||||
if already_registered_class and already_registered_class.__module__ != clazz.__module__:
|
||||
raise RuntimeError(
|
||||
f"Conflicting extension {self._name}/{extension}; "
|
||||
f"{already_registered_class.__module__}.{already_registered_class.__name} "
|
||||
f"and "
|
||||
f"{clazz.__module__}.{clazz.__name__}"
|
||||
)
|
||||
elif already_registered_class is None:
|
||||
clazz_full_name = f"{clazz.__module__}.{clazz.__name__}" if clazz is not None else "None"
|
||||
LOGGER.debug(f"Registering extension {self._name}/{extension}: {clazz_full_name}")
|
||||
self._registry[extension] = clazz
|
||||
|
||||
def get(self, extension):
|
||||
if extension not in self._registry:
|
||||
raise RuntimeError(f"Missing extension {self._name}/{extension}")
|
||||
return self._registry[extension]
|
||||
|
||||
@property
|
||||
def supported_extensions(self):
|
||||
return list(self._registry)
|
||||
|
||||
@staticmethod
|
||||
def scan_for_extensions(extension_dirs: List[Path]):
|
||||
register_pattern = r".*\.register_extension\(.*"
|
||||
|
||||
for extension_dir in extension_dirs:
|
||||
for python_path in extension_dir.rglob("*.py"):
|
||||
if not python_path.is_file():
|
||||
continue
|
||||
payload = python_path.read_text()
|
||||
if re.findall(register_pattern, payload):
|
||||
import_path = python_path.relative_to(toolkit_root_dir.parent)
|
||||
package = import_path.parent.as_posix().replace(os.sep, ".")
|
||||
package_with_module = f"{package}.{import_path.stem}"
|
||||
spec = importlib.util.spec_from_file_location(name=package_with_module, location=python_path)
|
||||
my_module = importlib.util.module_from_spec(spec)
|
||||
my_module.__package__ = package
|
||||
|
||||
try:
|
||||
spec.loader.exec_module(my_module) # pytype: disable=attribute-error
|
||||
except ModuleNotFoundError as e:
|
||||
LOGGER.error(
|
||||
f"Could not load extensions from {import_path} due to missing python packages; {e}"
|
||||
)
|
||||
|
||||
|
||||
runners = ExtensionManager("runners")
|
||||
loaders = ExtensionManager("loaders")
|
||||
savers = ExtensionManager("savers")
|
||||
converters = ExtensionManager("converters")
|
||||
toolkit_root_dir = (Path(__file__).parent / "..").resolve()
|
||||
ExtensionManager.scan_for_extensions([toolkit_root_dir])
|
|
@ -0,0 +1,47 @@
|
|||
import csv
|
||||
import re
|
||||
from typing import Dict, List
|
||||
|
||||
from natsort import natsorted
|
||||
from tabulate import tabulate
|
||||
|
||||
|
||||
def sort_results(results: List):
|
||||
results = natsorted(results, key=lambda item: [item[key] for key in item.keys()])
|
||||
return results
|
||||
|
||||
|
||||
def save_results(filename: str, data: List, formatted: bool = False):
|
||||
data = format_data(data=data) if formatted else data
|
||||
with open(filename, "a") as csvfile:
|
||||
fieldnames = data[0].keys()
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
for row in data:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def format_data(data: List[Dict]) -> List[Dict]:
|
||||
formatted_data = list()
|
||||
for item in data:
|
||||
formatted_item = format_keys(data=item)
|
||||
formatted_data.append(formatted_item)
|
||||
|
||||
return formatted_data
|
||||
|
||||
|
||||
def format_keys(data: Dict) -> Dict:
|
||||
keys = {format_key(key=key): value for key, value in data.items()}
|
||||
return keys
|
||||
|
||||
|
||||
def format_key(key: str) -> str:
|
||||
key = " ".join([k.capitalize() for k in re.split("_| ", key)])
|
||||
return key
|
||||
|
||||
|
||||
def show_results(results: List[Dict]):
|
||||
headers = list(results[0].keys())
|
||||
summary = map(lambda x: list(map(lambda item: item[1], x.items())), results)
|
||||
print(tabulate(summary, headers=headers))
|
|
@ -0,0 +1,47 @@
|
|||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
def warmup(
|
||||
model_name: str,
|
||||
batch_sizes: List[int],
|
||||
triton_instances: int = 1,
|
||||
profiling_data: str = "random",
|
||||
input_shapes: Optional[List[str]] = None,
|
||||
server_url: str = "localhost",
|
||||
measurement_window: int = 10000,
|
||||
):
|
||||
print("\n")
|
||||
print(f"==== Warmup start ====")
|
||||
print("\n")
|
||||
|
||||
input_shapes = " ".join(map(lambda shape: f" --shape {shape}", input_shapes)) if input_shapes else ""
|
||||
|
||||
bs = set()
|
||||
bs.add(min(batch_sizes))
|
||||
bs.add(max(batch_sizes))
|
||||
|
||||
measurement_window = 6 * measurement_window
|
||||
|
||||
for batch_size in bs:
|
||||
exec_args = f"""-max-threads {triton_instances} \
|
||||
-m {model_name} \
|
||||
-x 1 \
|
||||
-c {triton_instances} \
|
||||
-t {triton_instances} \
|
||||
-p {measurement_window} \
|
||||
-v \
|
||||
-i http \
|
||||
-u {server_url}:8000 \
|
||||
-b {batch_size} \
|
||||
--input-data {profiling_data} {input_shapes}
|
||||
"""
|
||||
|
||||
result = os.system(f"perf_client {exec_args}")
|
||||
if result != 0:
|
||||
print(f"Failed running performance tests. Perf client failed with exit code {result}")
|
||||
exit(1)
|
||||
|
||||
print("\n")
|
||||
print(f"==== Warmup done ====")
|
||||
print("\n")
|
18
TensorFlow/Classification/ConvNets/triton/metrics.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from deployment_toolkit.core import BaseMetricsCalculator
|
||||
|
||||
|
||||
class MetricsCalculator(BaseMetricsCalculator):
|
||||
def __init__(self, output_used_for_metrics: str = "classes"):
|
||||
self._output_used_for_metrics = output_used_for_metrics
|
||||
|
||||
def calc(self, *, y_pred: Dict[str, np.ndarray], y_real: Optional[Dict[str, np.ndarray]], **_) -> Dict[str, float]:
|
||||
y_true = y_real[self._output_used_for_metrics]
|
||||
y_pred = y_pred[self._output_used_for_metrics]
|
||||
y_true = np.squeeze(y_true)
|
||||
y_pred = np.squeeze(y_pred)
|
||||
assert y_true.shape == y_pred.shape
|
||||
return {"accuracy": (y_true == y_pred).mean()}
|
|
@ -0,0 +1,992 @@
|
|||
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Created with matplotlib (https://matplotlib.org/) -->
|
||||
<svg height="331.389812pt" version="1.1" viewBox="0 0 424.62875 331.389812" width="424.62875pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<metadata>
|
||||
<rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
<cc:Work>
|
||||
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
|
||||
<dc:date>2021-04-15T15:15:19.288796</dc:date>
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:creator>
|
||||
<cc:Agent>
|
||||
<dc:title>Matplotlib v3.3.4, https://matplotlib.org/</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:creator>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<defs>
|
||||
<style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>
|
||||
</defs>
|
||||
<g id="figure_1">
|
||||
<g id="patch_1">
|
||||
<path d="M 0 331.389812
|
||||
L 424.62875 331.389812
|
||||
L 424.62875 0
|
||||
L 0 0
|
||||
z
|
||||
" style="fill:#ffffff;"/>
|
||||
</g>
|
||||
<g id="axes_1">
|
||||
<g id="patch_2">
|
||||
<path d="M 60.30875 288.430125
|
||||
L 417.42875 288.430125
|
||||
L 417.42875 22.318125
|
||||
L 60.30875 22.318125
|
||||
z
|
||||
" style="fill:#ffffff;"/>
|
||||
</g>
|
||||
<g id="matplotlib.axis_1">
|
||||
<g id="xtick_1">
|
||||
<g id="line2d_1">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 76.541477 288.430125
|
||||
L 76.541477 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_1">
|
||||
<!-- 1 -->
|
||||
<g style="fill:#262626;" transform="translate(73.042102 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 12.40625 8.296875
|
||||
L 28.515625 8.296875
|
||||
L 28.515625 63.921875
|
||||
L 10.984375 60.40625
|
||||
L 10.984375 69.390625
|
||||
L 28.421875 72.90625
|
||||
L 38.28125 72.90625
|
||||
L 38.28125 8.296875
|
||||
L 54.390625 8.296875
|
||||
L 54.390625 0
|
||||
L 12.40625 0
|
||||
z
|
||||
" id="DejaVuSans-49"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_2">
|
||||
<g id="line2d_2">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 122.920698 288.430125
|
||||
L 122.920698 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_2">
|
||||
<!-- 2 -->
|
||||
<g style="fill:#262626;" transform="translate(119.421323 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 19.1875 8.296875
|
||||
L 53.609375 8.296875
|
||||
L 53.609375 0
|
||||
L 7.328125 0
|
||||
L 7.328125 8.296875
|
||||
Q 12.9375 14.109375 22.625 23.890625
|
||||
Q 32.328125 33.6875 34.8125 36.53125
|
||||
Q 39.546875 41.84375 41.421875 45.53125
|
||||
Q 43.3125 49.21875 43.3125 52.78125
|
||||
Q 43.3125 58.59375 39.234375 62.25
|
||||
Q 35.15625 65.921875 28.609375 65.921875
|
||||
Q 23.96875 65.921875 18.8125 64.3125
|
||||
Q 13.671875 62.703125 7.8125 59.421875
|
||||
L 7.8125 69.390625
|
||||
Q 13.765625 71.78125 18.9375 73
|
||||
Q 24.125 74.21875 28.421875 74.21875
|
||||
Q 39.75 74.21875 46.484375 68.546875
|
||||
Q 53.21875 62.890625 53.21875 53.421875
|
||||
Q 53.21875 48.921875 51.53125 44.890625
|
||||
Q 49.859375 40.875 45.40625 35.40625
|
||||
Q 44.1875 33.984375 37.640625 27.21875
|
||||
Q 31.109375 20.453125 19.1875 8.296875
|
||||
z
|
||||
" id="DejaVuSans-50"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-50"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_3">
|
||||
<g id="line2d_3">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 169.299919 288.430125
|
||||
L 169.299919 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_3">
|
||||
<!-- 4 -->
|
||||
<g style="fill:#262626;" transform="translate(165.800544 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 37.796875 64.3125
|
||||
L 12.890625 25.390625
|
||||
L 37.796875 25.390625
|
||||
z
|
||||
M 35.203125 72.90625
|
||||
L 47.609375 72.90625
|
||||
L 47.609375 25.390625
|
||||
L 58.015625 25.390625
|
||||
L 58.015625 17.1875
|
||||
L 47.609375 17.1875
|
||||
L 47.609375 0
|
||||
L 37.796875 0
|
||||
L 37.796875 17.1875
|
||||
L 4.890625 17.1875
|
||||
L 4.890625 26.703125
|
||||
z
|
||||
" id="DejaVuSans-52"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-52"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_4">
|
||||
<g id="line2d_4">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 215.67914 288.430125
|
||||
L 215.67914 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_4">
|
||||
<!-- 8 -->
|
||||
<g style="fill:#262626;" transform="translate(212.179765 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 31.78125 34.625
|
||||
Q 24.75 34.625 20.71875 30.859375
|
||||
Q 16.703125 27.09375 16.703125 20.515625
|
||||
Q 16.703125 13.921875 20.71875 10.15625
|
||||
Q 24.75 6.390625 31.78125 6.390625
|
||||
Q 38.8125 6.390625 42.859375 10.171875
|
||||
Q 46.921875 13.96875 46.921875 20.515625
|
||||
Q 46.921875 27.09375 42.890625 30.859375
|
||||
Q 38.875 34.625 31.78125 34.625
|
||||
z
|
||||
M 21.921875 38.8125
|
||||
Q 15.578125 40.375 12.03125 44.71875
|
||||
Q 8.5 49.078125 8.5 55.328125
|
||||
Q 8.5 64.0625 14.71875 69.140625
|
||||
Q 20.953125 74.21875 31.78125 74.21875
|
||||
Q 42.671875 74.21875 48.875 69.140625
|
||||
Q 55.078125 64.0625 55.078125 55.328125
|
||||
Q 55.078125 49.078125 51.53125 44.71875
|
||||
Q 48 40.375 41.703125 38.8125
|
||||
Q 48.828125 37.15625 52.796875 32.3125
|
||||
Q 56.78125 27.484375 56.78125 20.515625
|
||||
Q 56.78125 9.90625 50.3125 4.234375
|
||||
Q 43.84375 -1.421875 31.78125 -1.421875
|
||||
Q 19.734375 -1.421875 13.25 4.234375
|
||||
Q 6.78125 9.90625 6.78125 20.515625
|
||||
Q 6.78125 27.484375 10.78125 32.3125
|
||||
Q 14.796875 37.15625 21.921875 38.8125
|
||||
z
|
||||
M 18.3125 54.390625
|
||||
Q 18.3125 48.734375 21.84375 45.5625
|
||||
Q 25.390625 42.390625 31.78125 42.390625
|
||||
Q 38.140625 42.390625 41.71875 45.5625
|
||||
Q 45.3125 48.734375 45.3125 54.390625
|
||||
Q 45.3125 60.0625 41.71875 63.234375
|
||||
Q 38.140625 66.40625 31.78125 66.40625
|
||||
Q 25.390625 66.40625 21.84375 63.234375
|
||||
Q 18.3125 60.0625 18.3125 54.390625
|
||||
z
|
||||
" id="DejaVuSans-56"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-56"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_5">
|
||||
<g id="line2d_5">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 262.05836 288.430125
|
||||
L 262.05836 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_5">
|
||||
<!-- 16 -->
|
||||
<g style="fill:#262626;" transform="translate(255.05961 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 33.015625 40.375
|
||||
Q 26.375 40.375 22.484375 35.828125
|
||||
Q 18.609375 31.296875 18.609375 23.390625
|
||||
Q 18.609375 15.53125 22.484375 10.953125
|
||||
Q 26.375 6.390625 33.015625 6.390625
|
||||
Q 39.65625 6.390625 43.53125 10.953125
|
||||
Q 47.40625 15.53125 47.40625 23.390625
|
||||
Q 47.40625 31.296875 43.53125 35.828125
|
||||
Q 39.65625 40.375 33.015625 40.375
|
||||
z
|
||||
M 52.59375 71.296875
|
||||
L 52.59375 62.3125
|
||||
Q 48.875 64.0625 45.09375 64.984375
|
||||
Q 41.3125 65.921875 37.59375 65.921875
|
||||
Q 27.828125 65.921875 22.671875 59.328125
|
||||
Q 17.53125 52.734375 16.796875 39.40625
|
||||
Q 19.671875 43.65625 24.015625 45.921875
|
||||
Q 28.375 48.1875 33.59375 48.1875
|
||||
Q 44.578125 48.1875 50.953125 41.515625
|
||||
Q 57.328125 34.859375 57.328125 23.390625
|
||||
Q 57.328125 12.15625 50.6875 5.359375
|
||||
Q 44.046875 -1.421875 33.015625 -1.421875
|
||||
Q 20.359375 -1.421875 13.671875 8.265625
|
||||
Q 6.984375 17.96875 6.984375 36.375
|
||||
Q 6.984375 53.65625 15.1875 63.9375
|
||||
Q 23.390625 74.21875 37.203125 74.21875
|
||||
Q 40.921875 74.21875 44.703125 73.484375
|
||||
Q 48.484375 72.75 52.59375 71.296875
|
||||
z
|
||||
" id="DejaVuSans-54"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-54"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_6">
|
||||
<g id="line2d_6">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 308.437581 288.430125
|
||||
L 308.437581 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_6">
|
||||
<!-- 32 -->
|
||||
<g style="fill:#262626;" transform="translate(301.438831 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 40.578125 39.3125
|
||||
Q 47.65625 37.796875 51.625 33
|
||||
Q 55.609375 28.21875 55.609375 21.1875
|
||||
Q 55.609375 10.40625 48.1875 4.484375
|
||||
Q 40.765625 -1.421875 27.09375 -1.421875
|
||||
Q 22.515625 -1.421875 17.65625 -0.515625
|
||||
Q 12.796875 0.390625 7.625 2.203125
|
||||
L 7.625 11.71875
|
||||
Q 11.71875 9.328125 16.59375 8.109375
|
||||
Q 21.484375 6.890625 26.8125 6.890625
|
||||
Q 36.078125 6.890625 40.9375 10.546875
|
||||
Q 45.796875 14.203125 45.796875 21.1875
|
||||
Q 45.796875 27.640625 41.28125 31.265625
|
||||
Q 36.765625 34.90625 28.71875 34.90625
|
||||
L 20.21875 34.90625
|
||||
L 20.21875 43.015625
|
||||
L 29.109375 43.015625
|
||||
Q 36.375 43.015625 40.234375 45.921875
|
||||
Q 44.09375 48.828125 44.09375 54.296875
|
||||
Q 44.09375 59.90625 40.109375 62.90625
|
||||
Q 36.140625 65.921875 28.71875 65.921875
|
||||
Q 24.65625 65.921875 20.015625 65.03125
|
||||
Q 15.375 64.15625 9.8125 62.3125
|
||||
L 9.8125 71.09375
|
||||
Q 15.4375 72.65625 20.34375 73.4375
|
||||
Q 25.25 74.21875 29.59375 74.21875
|
||||
Q 40.828125 74.21875 47.359375 69.109375
|
||||
Q 53.90625 64.015625 53.90625 55.328125
|
||||
Q 53.90625 49.265625 50.4375 45.09375
|
||||
Q 46.96875 40.921875 40.578125 39.3125
|
||||
z
|
||||
" id="DejaVuSans-51"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-51"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-50"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_7">
|
||||
<g id="line2d_7">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 354.816802 288.430125
|
||||
L 354.816802 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_7">
|
||||
<!-- 64 -->
|
||||
<g style="fill:#262626;" transform="translate(347.818052 306.288406)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-54"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-52"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_8">
|
||||
<g id="line2d_8">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 401.196023 288.430125
|
||||
L 401.196023 22.318125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_8">
|
||||
<!-- 128 -->
|
||||
<g style="fill:#262626;" transform="translate(390.697898 306.288406)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-50"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-56"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="text_9">
|
||||
<!-- Client Batch Size -->
|
||||
<g style="fill:#262626;" transform="translate(188.120938 321.694187)scale(0.12 -0.12)">
|
||||
<defs>
|
||||
<path d="M 64.40625 67.28125
|
||||
L 64.40625 56.890625
|
||||
Q 59.421875 61.53125 53.78125 63.8125
|
||||
Q 48.140625 66.109375 41.796875 66.109375
|
||||
Q 29.296875 66.109375 22.65625 58.46875
|
||||
Q 16.015625 50.828125 16.015625 36.375
|
||||
Q 16.015625 21.96875 22.65625 14.328125
|
||||
Q 29.296875 6.6875 41.796875 6.6875
|
||||
Q 48.140625 6.6875 53.78125 8.984375
|
||||
Q 59.421875 11.28125 64.40625 15.921875
|
||||
L 64.40625 5.609375
|
||||
Q 59.234375 2.09375 53.4375 0.328125
|
||||
Q 47.65625 -1.421875 41.21875 -1.421875
|
||||
Q 24.65625 -1.421875 15.125 8.703125
|
||||
Q 5.609375 18.84375 5.609375 36.375
|
||||
Q 5.609375 53.953125 15.125 64.078125
|
||||
Q 24.65625 74.21875 41.21875 74.21875
|
||||
Q 47.75 74.21875 53.53125 72.484375
|
||||
Q 59.328125 70.75 64.40625 67.28125
|
||||
z
|
||||
" id="DejaVuSans-67"/>
|
||||
<path d="M 9.421875 75.984375
|
||||
L 18.40625 75.984375
|
||||
L 18.40625 0
|
||||
L 9.421875 0
|
||||
z
|
||||
" id="DejaVuSans-108"/>
|
||||
<path d="M 9.421875 54.6875
|
||||
L 18.40625 54.6875
|
||||
L 18.40625 0
|
||||
L 9.421875 0
|
||||
z
|
||||
M 9.421875 75.984375
|
||||
L 18.40625 75.984375
|
||||
L 18.40625 64.59375
|
||||
L 9.421875 64.59375
|
||||
z
|
||||
" id="DejaVuSans-105"/>
|
||||
<path d="M 56.203125 29.59375
|
||||
L 56.203125 25.203125
|
||||
L 14.890625 25.203125
|
||||
Q 15.484375 15.921875 20.484375 11.0625
|
||||
Q 25.484375 6.203125 34.421875 6.203125
|
||||
Q 39.59375 6.203125 44.453125 7.46875
|
||||
Q 49.3125 8.734375 54.109375 11.28125
|
||||
L 54.109375 2.78125
|
||||
Q 49.265625 0.734375 44.1875 -0.34375
|
||||
Q 39.109375 -1.421875 33.890625 -1.421875
|
||||
Q 20.796875 -1.421875 13.15625 6.1875
|
||||
Q 5.515625 13.8125 5.515625 26.8125
|
||||
Q 5.515625 40.234375 12.765625 48.109375
|
||||
Q 20.015625 56 32.328125 56
|
||||
Q 43.359375 56 49.78125 48.890625
|
||||
Q 56.203125 41.796875 56.203125 29.59375
|
||||
z
|
||||
M 47.21875 32.234375
|
||||
Q 47.125 39.59375 43.09375 43.984375
|
||||
Q 39.0625 48.390625 32.421875 48.390625
|
||||
Q 24.90625 48.390625 20.390625 44.140625
|
||||
Q 15.875 39.890625 15.1875 32.171875
|
||||
z
|
||||
" id="DejaVuSans-101"/>
|
||||
<path d="M 54.890625 33.015625
|
||||
L 54.890625 0
|
||||
L 45.90625 0
|
||||
L 45.90625 32.71875
|
||||
Q 45.90625 40.484375 42.875 44.328125
|
||||
Q 39.84375 48.1875 33.796875 48.1875
|
||||
Q 26.515625 48.1875 22.3125 43.546875
|
||||
Q 18.109375 38.921875 18.109375 30.90625
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 54.6875
|
||||
L 18.109375 54.6875
|
||||
L 18.109375 46.1875
|
||||
Q 21.34375 51.125 25.703125 53.5625
|
||||
Q 30.078125 56 35.796875 56
|
||||
Q 45.21875 56 50.046875 50.171875
|
||||
Q 54.890625 44.34375 54.890625 33.015625
|
||||
z
|
||||
" id="DejaVuSans-110"/>
|
||||
<path d="M 18.3125 70.21875
|
||||
L 18.3125 54.6875
|
||||
L 36.8125 54.6875
|
||||
L 36.8125 47.703125
|
||||
L 18.3125 47.703125
|
||||
L 18.3125 18.015625
|
||||
Q 18.3125 11.328125 20.140625 9.421875
|
||||
Q 21.96875 7.515625 27.59375 7.515625
|
||||
L 36.8125 7.515625
|
||||
L 36.8125 0
|
||||
L 27.59375 0
|
||||
Q 17.1875 0 13.234375 3.875
|
||||
Q 9.28125 7.765625 9.28125 18.015625
|
||||
L 9.28125 47.703125
|
||||
L 2.6875 47.703125
|
||||
L 2.6875 54.6875
|
||||
L 9.28125 54.6875
|
||||
L 9.28125 70.21875
|
||||
z
|
||||
" id="DejaVuSans-116"/>
|
||||
<path id="DejaVuSans-32"/>
|
||||
<path d="M 19.671875 34.8125
|
||||
L 19.671875 8.109375
|
||||
L 35.5 8.109375
|
||||
Q 43.453125 8.109375 47.28125 11.40625
|
||||
Q 51.125 14.703125 51.125 21.484375
|
||||
Q 51.125 28.328125 47.28125 31.5625
|
||||
Q 43.453125 34.8125 35.5 34.8125
|
||||
z
|
||||
M 19.671875 64.796875
|
||||
L 19.671875 42.828125
|
||||
L 34.28125 42.828125
|
||||
Q 41.5 42.828125 45.03125 45.53125
|
||||
Q 48.578125 48.25 48.578125 53.8125
|
||||
Q 48.578125 59.328125 45.03125 62.0625
|
||||
Q 41.5 64.796875 34.28125 64.796875
|
||||
z
|
||||
M 9.8125 72.90625
|
||||
L 35.015625 72.90625
|
||||
Q 46.296875 72.90625 52.390625 68.21875
|
||||
Q 58.5 63.53125 58.5 54.890625
|
||||
Q 58.5 48.1875 55.375 44.234375
|
||||
Q 52.25 40.28125 46.1875 39.3125
|
||||
Q 53.46875 37.75 57.5 32.78125
|
||||
Q 61.53125 27.828125 61.53125 20.40625
|
||||
Q 61.53125 10.640625 54.890625 5.3125
|
||||
Q 48.25 0 35.984375 0
|
||||
L 9.8125 0
|
||||
z
|
||||
" id="DejaVuSans-66"/>
|
||||
<path d="M 34.28125 27.484375
|
||||
Q 23.390625 27.484375 19.1875 25
|
||||
Q 14.984375 22.515625 14.984375 16.5
|
||||
Q 14.984375 11.71875 18.140625 8.90625
|
||||
Q 21.296875 6.109375 26.703125 6.109375
|
||||
Q 34.1875 6.109375 38.703125 11.40625
|
||||
Q 43.21875 16.703125 43.21875 25.484375
|
||||
L 43.21875 27.484375
|
||||
z
|
||||
M 52.203125 31.203125
|
||||
L 52.203125 0
|
||||
L 43.21875 0
|
||||
L 43.21875 8.296875
|
||||
Q 40.140625 3.328125 35.546875 0.953125
|
||||
Q 30.953125 -1.421875 24.3125 -1.421875
|
||||
Q 15.921875 -1.421875 10.953125 3.296875
|
||||
Q 6 8.015625 6 15.921875
|
||||
Q 6 25.140625 12.171875 29.828125
|
||||
Q 18.359375 34.515625 30.609375 34.515625
|
||||
L 43.21875 34.515625
|
||||
L 43.21875 35.40625
|
||||
Q 43.21875 41.609375 39.140625 45
|
||||
Q 35.0625 48.390625 27.6875 48.390625
|
||||
Q 23 48.390625 18.546875 47.265625
|
||||
Q 14.109375 46.140625 10.015625 43.890625
|
||||
L 10.015625 52.203125
|
||||
Q 14.9375 54.109375 19.578125 55.046875
|
||||
Q 24.21875 56 28.609375 56
|
||||
Q 40.484375 56 46.34375 49.84375
|
||||
Q 52.203125 43.703125 52.203125 31.203125
|
||||
z
|
||||
" id="DejaVuSans-97"/>
|
||||
<path d="M 48.78125 52.59375
|
||||
L 48.78125 44.1875
|
||||
Q 44.96875 46.296875 41.140625 47.34375
|
||||
Q 37.3125 48.390625 33.40625 48.390625
|
||||
Q 24.65625 48.390625 19.8125 42.84375
|
||||
Q 14.984375 37.3125 14.984375 27.296875
|
||||
Q 14.984375 17.28125 19.8125 11.734375
|
||||
Q 24.65625 6.203125 33.40625 6.203125
|
||||
Q 37.3125 6.203125 41.140625 7.25
|
||||
Q 44.96875 8.296875 48.78125 10.40625
|
||||
L 48.78125 2.09375
|
||||
Q 45.015625 0.34375 40.984375 -0.53125
|
||||
Q 36.96875 -1.421875 32.421875 -1.421875
|
||||
Q 20.0625 -1.421875 12.78125 6.34375
|
||||
Q 5.515625 14.109375 5.515625 27.296875
|
||||
Q 5.515625 40.671875 12.859375 48.328125
|
||||
Q 20.21875 56 33.015625 56
|
||||
Q 37.15625 56 41.109375 55.140625
|
||||
Q 45.0625 54.296875 48.78125 52.59375
|
||||
z
|
||||
" id="DejaVuSans-99"/>
|
||||
<path d="M 54.890625 33.015625
|
||||
L 54.890625 0
|
||||
L 45.90625 0
|
||||
L 45.90625 32.71875
|
||||
Q 45.90625 40.484375 42.875 44.328125
|
||||
Q 39.84375 48.1875 33.796875 48.1875
|
||||
Q 26.515625 48.1875 22.3125 43.546875
|
||||
Q 18.109375 38.921875 18.109375 30.90625
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 75.984375
|
||||
L 18.109375 75.984375
|
||||
L 18.109375 46.1875
|
||||
Q 21.34375 51.125 25.703125 53.5625
|
||||
Q 30.078125 56 35.796875 56
|
||||
Q 45.21875 56 50.046875 50.171875
|
||||
Q 54.890625 44.34375 54.890625 33.015625
|
||||
z
|
||||
" id="DejaVuSans-104"/>
|
||||
<path d="M 53.515625 70.515625
|
||||
L 53.515625 60.890625
|
||||
Q 47.90625 63.578125 42.921875 64.890625
|
||||
Q 37.9375 66.21875 33.296875 66.21875
|
||||
Q 25.25 66.21875 20.875 63.09375
|
||||
Q 16.5 59.96875 16.5 54.203125
|
||||
Q 16.5 49.359375 19.40625 46.890625
|
||||
Q 22.3125 44.4375 30.421875 42.921875
|
||||
L 36.375 41.703125
|
||||
Q 47.40625 39.59375 52.65625 34.296875
|
||||
Q 57.90625 29 57.90625 20.125
|
||||
Q 57.90625 9.515625 50.796875 4.046875
|
||||
Q 43.703125 -1.421875 29.984375 -1.421875
|
||||
Q 24.8125 -1.421875 18.96875 -0.25
|
||||
Q 13.140625 0.921875 6.890625 3.21875
|
||||
L 6.890625 13.375
|
||||
Q 12.890625 10.015625 18.65625 8.296875
|
||||
Q 24.421875 6.59375 29.984375 6.59375
|
||||
Q 38.421875 6.59375 43.015625 9.90625
|
||||
Q 47.609375 13.234375 47.609375 19.390625
|
||||
Q 47.609375 24.75 44.3125 27.78125
|
||||
Q 41.015625 30.8125 33.5 32.328125
|
||||
L 27.484375 33.5
|
||||
Q 16.453125 35.6875 11.515625 40.375
|
||||
Q 6.59375 45.0625 6.59375 53.421875
|
||||
Q 6.59375 63.09375 13.40625 68.65625
|
||||
Q 20.21875 74.21875 32.171875 74.21875
|
||||
Q 37.3125 74.21875 42.625 73.28125
|
||||
Q 47.953125 72.359375 53.515625 70.515625
|
||||
z
|
||||
" id="DejaVuSans-83"/>
|
||||
<path d="M 5.515625 54.6875
|
||||
L 48.1875 54.6875
|
||||
L 48.1875 46.484375
|
||||
L 14.40625 7.171875
|
||||
L 48.1875 7.171875
|
||||
L 48.1875 0
|
||||
L 4.296875 0
|
||||
L 4.296875 8.203125
|
||||
L 38.09375 47.515625
|
||||
L 5.515625 47.515625
|
||||
z
|
||||
" id="DejaVuSans-122"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-67"/>
|
||||
<use x="69.824219" xlink:href="#DejaVuSans-108"/>
|
||||
<use x="97.607422" xlink:href="#DejaVuSans-105"/>
|
||||
<use x="125.390625" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="186.914062" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="250.292969" xlink:href="#DejaVuSans-116"/>
|
||||
<use x="289.501953" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="321.289062" xlink:href="#DejaVuSans-66"/>
|
||||
<use x="389.892578" xlink:href="#DejaVuSans-97"/>
|
||||
<use x="451.171875" xlink:href="#DejaVuSans-116"/>
|
||||
<use x="490.380859" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="545.361328" xlink:href="#DejaVuSans-104"/>
|
||||
<use x="608.740234" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="640.527344" xlink:href="#DejaVuSans-83"/>
|
||||
<use x="704.003906" xlink:href="#DejaVuSans-105"/>
|
||||
<use x="731.787109" xlink:href="#DejaVuSans-122"/>
|
||||
<use x="784.277344" xlink:href="#DejaVuSans-101"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="matplotlib.axis_2">
|
||||
<g id="ytick_1">
|
||||
<g id="line2d_9">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 60.30875 288.430125
|
||||
L 417.42875 288.430125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_10">
|
||||
<!-- 0 -->
|
||||
<g style="fill:#262626;" transform="translate(43.81 292.609266)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 31.78125 66.40625
|
||||
Q 24.171875 66.40625 20.328125 58.90625
|
||||
Q 16.5 51.421875 16.5 36.375
|
||||
Q 16.5 21.390625 20.328125 13.890625
|
||||
Q 24.171875 6.390625 31.78125 6.390625
|
||||
Q 39.453125 6.390625 43.28125 13.890625
|
||||
Q 47.125 21.390625 47.125 36.375
|
||||
Q 47.125 51.421875 43.28125 58.90625
|
||||
Q 39.453125 66.40625 31.78125 66.40625
|
||||
z
|
||||
M 31.78125 74.21875
|
||||
Q 44.046875 74.21875 50.515625 64.515625
|
||||
Q 56.984375 54.828125 56.984375 36.375
|
||||
Q 56.984375 17.96875 50.515625 8.265625
|
||||
Q 44.046875 -1.421875 31.78125 -1.421875
|
||||
Q 19.53125 -1.421875 13.0625 8.265625
|
||||
Q 6.59375 17.96875 6.59375 36.375
|
||||
Q 6.59375 54.828125 13.0625 64.515625
|
||||
Q 19.53125 74.21875 31.78125 74.21875
|
||||
z
|
||||
" id="DejaVuSans-48"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_2">
|
||||
<g id="line2d_10">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 60.30875 241.763458
|
||||
L 417.42875 241.763458
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_11">
|
||||
<!-- 200 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 245.942599)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-50"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_3">
|
||||
<g id="line2d_11">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 60.30875 195.096792
|
||||
L 417.42875 195.096792
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_12">
|
||||
<!-- 400 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 199.275932)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-52"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_4">
|
||||
<g id="line2d_12">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 60.30875 148.430125
|
||||
L 417.42875 148.430125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_13">
|
||||
<!-- 600 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 152.609266)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-54"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_5">
|
||||
<g id="line2d_13">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 60.30875 101.763458
|
||||
L 417.42875 101.763458
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_14">
|
||||
<!-- 800 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 105.942599)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-56"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_6">
|
||||
<g id="line2d_14">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 60.30875 55.096792
|
||||
L 417.42875 55.096792
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_15">
|
||||
<!-- 1000 -->
|
||||
<g style="fill:#262626;" transform="translate(22.81375 59.275932)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="190.869141" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="text_16">
|
||||
<!-- Inferences/second -->
|
||||
<g style="fill:#262626;" transform="translate(16.318125 210.113812)rotate(-90)scale(0.12 -0.12)">
|
||||
<defs>
|
||||
<path d="M 9.8125 72.90625
|
||||
L 19.671875 72.90625
|
||||
L 19.671875 0
|
||||
L 9.8125 0
|
||||
z
|
||||
" id="DejaVuSans-73"/>
|
||||
<path d="M 37.109375 75.984375
|
||||
L 37.109375 68.5
|
||||
L 28.515625 68.5
|
||||
Q 23.6875 68.5 21.796875 66.546875
|
||||
Q 19.921875 64.59375 19.921875 59.515625
|
||||
L 19.921875 54.6875
|
||||
L 34.71875 54.6875
|
||||
L 34.71875 47.703125
|
||||
L 19.921875 47.703125
|
||||
L 19.921875 0
|
||||
L 10.890625 0
|
||||
L 10.890625 47.703125
|
||||
L 2.296875 47.703125
|
||||
L 2.296875 54.6875
|
||||
L 10.890625 54.6875
|
||||
L 10.890625 58.5
|
||||
Q 10.890625 67.625 15.140625 71.796875
|
||||
Q 19.390625 75.984375 28.609375 75.984375
|
||||
z
|
||||
" id="DejaVuSans-102"/>
|
||||
<path d="M 41.109375 46.296875
|
||||
Q 39.59375 47.171875 37.8125 47.578125
|
||||
Q 36.03125 48 33.890625 48
|
||||
Q 26.265625 48 22.1875 43.046875
|
||||
Q 18.109375 38.09375 18.109375 28.8125
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 54.6875
|
||||
L 18.109375 54.6875
|
||||
L 18.109375 46.1875
|
||||
Q 20.953125 51.171875 25.484375 53.578125
|
||||
Q 30.03125 56 36.53125 56
|
||||
Q 37.453125 56 38.578125 55.875
|
||||
Q 39.703125 55.765625 41.0625 55.515625
|
||||
z
|
||||
" id="DejaVuSans-114"/>
|
||||
<path d="M 44.28125 53.078125
|
||||
L 44.28125 44.578125
|
||||
Q 40.484375 46.53125 36.375 47.5
|
||||
Q 32.28125 48.484375 27.875 48.484375
|
||||
Q 21.1875 48.484375 17.84375 46.4375
|
||||
Q 14.5 44.390625 14.5 40.28125
|
||||
Q 14.5 37.15625 16.890625 35.375
|
||||
Q 19.28125 33.59375 26.515625 31.984375
|
||||
L 29.59375 31.296875
|
||||
Q 39.15625 29.25 43.1875 25.515625
|
||||
Q 47.21875 21.78125 47.21875 15.09375
|
||||
Q 47.21875 7.46875 41.1875 3.015625
|
||||
Q 35.15625 -1.421875 24.609375 -1.421875
|
||||
Q 20.21875 -1.421875 15.453125 -0.5625
|
||||
Q 10.6875 0.296875 5.421875 2
|
||||
L 5.421875 11.28125
|
||||
Q 10.40625 8.6875 15.234375 7.390625
|
||||
Q 20.0625 6.109375 24.8125 6.109375
|
||||
Q 31.15625 6.109375 34.5625 8.28125
|
||||
Q 37.984375 10.453125 37.984375 14.40625
|
||||
Q 37.984375 18.0625 35.515625 20.015625
|
||||
Q 33.0625 21.96875 24.703125 23.78125
|
||||
L 21.578125 24.515625
|
||||
Q 13.234375 26.265625 9.515625 29.90625
|
||||
Q 5.8125 33.546875 5.8125 39.890625
|
||||
Q 5.8125 47.609375 11.28125 51.796875
|
||||
Q 16.75 56 26.8125 56
|
||||
Q 31.78125 56 36.171875 55.265625
|
||||
Q 40.578125 54.546875 44.28125 53.078125
|
||||
z
|
||||
" id="DejaVuSans-115"/>
|
||||
<path d="M 25.390625 72.90625
|
||||
L 33.6875 72.90625
|
||||
L 8.296875 -9.28125
|
||||
L 0 -9.28125
|
||||
z
|
||||
" id="DejaVuSans-47"/>
|
||||
<path d="M 30.609375 48.390625
|
||||
Q 23.390625 48.390625 19.1875 42.75
|
||||
Q 14.984375 37.109375 14.984375 27.296875
|
||||
Q 14.984375 17.484375 19.15625 11.84375
|
||||
Q 23.34375 6.203125 30.609375 6.203125
|
||||
Q 37.796875 6.203125 41.984375 11.859375
|
||||
Q 46.1875 17.53125 46.1875 27.296875
|
||||
Q 46.1875 37.015625 41.984375 42.703125
|
||||
Q 37.796875 48.390625 30.609375 48.390625
|
||||
z
|
||||
M 30.609375 56
|
||||
Q 42.328125 56 49.015625 48.375
|
||||
Q 55.71875 40.765625 55.71875 27.296875
|
||||
Q 55.71875 13.875 49.015625 6.21875
|
||||
Q 42.328125 -1.421875 30.609375 -1.421875
|
||||
Q 18.84375 -1.421875 12.171875 6.21875
|
||||
Q 5.515625 13.875 5.515625 27.296875
|
||||
Q 5.515625 40.765625 12.171875 48.375
|
||||
Q 18.84375 56 30.609375 56
|
||||
z
|
||||
" id="DejaVuSans-111"/>
|
||||
<path d="M 45.40625 46.390625
|
||||
L 45.40625 75.984375
|
||||
L 54.390625 75.984375
|
||||
L 54.390625 0
|
||||
L 45.40625 0
|
||||
L 45.40625 8.203125
|
||||
Q 42.578125 3.328125 38.25 0.953125
|
||||
Q 33.9375 -1.421875 27.875 -1.421875
|
||||
Q 17.96875 -1.421875 11.734375 6.484375
|
||||
Q 5.515625 14.40625 5.515625 27.296875
|
||||
Q 5.515625 40.1875 11.734375 48.09375
|
||||
Q 17.96875 56 27.875 56
|
||||
Q 33.9375 56 38.25 53.625
|
||||
Q 42.578125 51.265625 45.40625 46.390625
|
||||
z
|
||||
M 14.796875 27.296875
|
||||
Q 14.796875 17.390625 18.875 11.75
|
||||
Q 22.953125 6.109375 30.078125 6.109375
|
||||
Q 37.203125 6.109375 41.296875 11.75
|
||||
Q 45.40625 17.390625 45.40625 27.296875
|
||||
Q 45.40625 37.203125 41.296875 42.84375
|
||||
Q 37.203125 48.484375 30.078125 48.484375
|
||||
Q 22.953125 48.484375 18.875 42.84375
|
||||
Q 14.796875 37.203125 14.796875 27.296875
|
||||
z
|
||||
" id="DejaVuSans-100"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-73"/>
|
||||
<use x="29.492188" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="92.871094" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="128.076172" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="189.599609" xlink:href="#DejaVuSans-114"/>
|
||||
<use x="228.462891" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="289.986328" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="353.365234" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="408.345703" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="469.869141" xlink:href="#DejaVuSans-115"/>
|
||||
<use x="521.96875" xlink:href="#DejaVuSans-47"/>
|
||||
<use x="555.660156" xlink:href="#DejaVuSans-115"/>
|
||||
<use x="607.759766" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="669.283203" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="724.263672" xlink:href="#DejaVuSans-111"/>
|
||||
<use x="785.445312" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="848.824219" xlink:href="#DejaVuSans-100"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="line2d_15">
|
||||
<path clip-path="url(#p0d91672b8f)" d="M 76.541477 228.790125
|
||||
L 122.920698 190.616792
|
||||
L 169.299919 140.590125
|
||||
L 215.67914 86.456792
|
||||
L 262.05836 66.670125
|
||||
L 308.437581 89.070125
|
||||
L 354.816802 95.790125
|
||||
L 401.196023 124.163458
|
||||
" style="fill:none;stroke:#0173b2;stroke-linecap:round;stroke-width:1.5;"/>
|
||||
<defs>
|
||||
<path d="M 0 3
|
||||
C 0.795609 3 1.55874 2.683901 2.12132 2.12132
|
||||
C 2.683901 1.55874 3 0.795609 3 0
|
||||
C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132
|
||||
C 1.55874 -2.683901 0.795609 -3 0 -3
|
||||
C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132
|
||||
C -2.683901 -1.55874 -3 -0.795609 -3 0
|
||||
C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132
|
||||
C -1.55874 2.683901 -0.795609 3 0 3
|
||||
z
|
||||
" id="mfc2dfc2535" style="stroke:#ffffff;stroke-width:0.75;"/>
|
||||
</defs>
|
||||
<g clip-path="url(#p0d91672b8f)">
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="76.541477" xlink:href="#mfc2dfc2535" y="228.790125"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="122.920698" xlink:href="#mfc2dfc2535" y="190.616792"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="169.299919" xlink:href="#mfc2dfc2535" y="140.590125"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="215.67914" xlink:href="#mfc2dfc2535" y="86.456792"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="262.05836" xlink:href="#mfc2dfc2535" y="66.670125"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="308.437581" xlink:href="#mfc2dfc2535" y="89.070125"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="354.816802" xlink:href="#mfc2dfc2535" y="95.790125"/>
|
||||
<use style="fill:#0173b2;stroke:#ffffff;stroke-width:0.75;" x="401.196023" xlink:href="#mfc2dfc2535" y="124.163458"/>
|
||||
</g>
|
||||
</g>
|
||||
<g id="line2d_16"/>
|
||||
<g id="line2d_17"/>
|
||||
<g id="patch_3">
|
||||
<path d="M 60.30875 288.430125
|
||||
L 60.30875 22.318125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="patch_4">
|
||||
<path d="M 417.42875 288.430125
|
||||
L 417.42875 22.318125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="patch_5">
|
||||
<path d="M 60.30875 288.430125
|
||||
L 417.42875 288.430125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="patch_6">
|
||||
<path d="M 60.30875 22.318125
|
||||
L 417.42875 22.318125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="text_17">
|
||||
<!-- Performance offline -->
|
||||
<g style="fill:#262626;" transform="translate(180.219688 16.318125)scale(0.12 -0.12)">
|
||||
<defs>
|
||||
<path d="M 19.671875 64.796875
|
||||
L 19.671875 37.40625
|
||||
L 32.078125 37.40625
|
||||
Q 38.96875 37.40625 42.71875 40.96875
|
||||
Q 46.484375 44.53125 46.484375 51.125
|
||||
Q 46.484375 57.671875 42.71875 61.234375
|
||||
Q 38.96875 64.796875 32.078125 64.796875
|
||||
z
|
||||
M 9.8125 72.90625
|
||||
L 32.078125 72.90625
|
||||
Q 44.34375 72.90625 50.609375 67.359375
|
||||
Q 56.890625 61.8125 56.890625 51.125
|
||||
Q 56.890625 40.328125 50.609375 34.8125
|
||||
Q 44.34375 29.296875 32.078125 29.296875
|
||||
L 19.671875 29.296875
|
||||
L 19.671875 0
|
||||
L 9.8125 0
|
||||
z
|
||||
" id="DejaVuSans-80"/>
|
||||
<path d="M 52 44.1875
|
||||
Q 55.375 50.25 60.0625 53.125
|
||||
Q 64.75 56 71.09375 56
|
||||
Q 79.640625 56 84.28125 50.015625
|
||||
Q 88.921875 44.046875 88.921875 33.015625
|
||||
L 88.921875 0
|
||||
L 79.890625 0
|
||||
L 79.890625 32.71875
|
||||
Q 79.890625 40.578125 77.09375 44.375
|
||||
Q 74.3125 48.1875 68.609375 48.1875
|
||||
Q 61.625 48.1875 57.5625 43.546875
|
||||
Q 53.515625 38.921875 53.515625 30.90625
|
||||
L 53.515625 0
|
||||
L 44.484375 0
|
||||
L 44.484375 32.71875
|
||||
Q 44.484375 40.625 41.703125 44.40625
|
||||
Q 38.921875 48.1875 33.109375 48.1875
|
||||
Q 26.21875 48.1875 22.15625 43.53125
|
||||
Q 18.109375 38.875 18.109375 30.90625
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 54.6875
|
||||
L 18.109375 54.6875
|
||||
L 18.109375 46.1875
|
||||
Q 21.1875 51.21875 25.484375 53.609375
|
||||
Q 29.78125 56 35.6875 56
|
||||
Q 41.65625 56 45.828125 52.96875
|
||||
Q 50 49.953125 52 44.1875
|
||||
z
|
||||
" id="DejaVuSans-109"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-80"/>
|
||||
<use x="56.677734" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="118.201172" xlink:href="#DejaVuSans-114"/>
|
||||
<use x="159.314453" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="194.519531" xlink:href="#DejaVuSans-111"/>
|
||||
<use x="255.701172" xlink:href="#DejaVuSans-114"/>
|
||||
<use x="295.064453" xlink:href="#DejaVuSans-109"/>
|
||||
<use x="392.476562" xlink:href="#DejaVuSans-97"/>
|
||||
<use x="453.755859" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="517.134766" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="572.115234" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="633.638672" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="665.425781" xlink:href="#DejaVuSans-111"/>
|
||||
<use x="726.607422" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="761.8125" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="797.017578" xlink:href="#DejaVuSans-108"/>
|
||||
<use x="824.800781" xlink:href="#DejaVuSans-105"/>
|
||||
<use x="852.583984" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="915.962891" xlink:href="#DejaVuSans-101"/>
|
||||
</g>
|
||||
</g>
|
||||
<g id="legend_1"/>
|
||||
</g>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="p0d91672b8f">
|
||||
<rect height="266.112" width="357.12" x="60.30875" y="22.318125"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 34 KiB |
|
@ -0,0 +1,980 @@
|
|||
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Created with matplotlib (https://matplotlib.org/) -->
|
||||
<svg height="331.389812pt" version="1.1" viewBox="0 0 417.63 331.389812" width="417.63pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<metadata>
|
||||
<rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
<cc:Work>
|
||||
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
|
||||
<dc:date>2021-04-15T15:15:18.496826</dc:date>
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:creator>
|
||||
<cc:Agent>
|
||||
<dc:title>Matplotlib v3.3.4, https://matplotlib.org/</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:creator>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<defs>
|
||||
<style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>
|
||||
</defs>
|
||||
<g id="figure_1">
|
||||
<g id="patch_1">
|
||||
<path d="M 0 331.389812
|
||||
L 417.63 331.389812
|
||||
L 417.63 0
|
||||
L 0 0
|
||||
z
|
||||
" style="fill:#ffffff;"/>
|
||||
</g>
|
||||
<g id="axes_1">
|
||||
<g id="patch_2">
|
||||
<path d="M 53.31 288.430125
|
||||
L 410.43 288.430125
|
||||
L 410.43 22.318125
|
||||
L 53.31 22.318125
|
||||
z
|
||||
" style="fill:#ffffff;"/>
|
||||
</g>
|
||||
<g id="matplotlib.axis_1">
|
||||
<g id="xtick_1">
|
||||
<g id="text_1">
|
||||
<!-- 1 -->
|
||||
<g style="fill:#262626;" transform="translate(72.130625 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 12.40625 8.296875
|
||||
L 28.515625 8.296875
|
||||
L 28.515625 63.921875
|
||||
L 10.984375 60.40625
|
||||
L 10.984375 69.390625
|
||||
L 28.421875 72.90625
|
||||
L 38.28125 72.90625
|
||||
L 38.28125 8.296875
|
||||
L 54.390625 8.296875
|
||||
L 54.390625 0
|
||||
L 12.40625 0
|
||||
z
|
||||
" id="DejaVuSans-49"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_2">
|
||||
<g id="text_2">
|
||||
<!-- 2 -->
|
||||
<g style="fill:#262626;" transform="translate(116.770625 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 19.1875 8.296875
|
||||
L 53.609375 8.296875
|
||||
L 53.609375 0
|
||||
L 7.328125 0
|
||||
L 7.328125 8.296875
|
||||
Q 12.9375 14.109375 22.625 23.890625
|
||||
Q 32.328125 33.6875 34.8125 36.53125
|
||||
Q 39.546875 41.84375 41.421875 45.53125
|
||||
Q 43.3125 49.21875 43.3125 52.78125
|
||||
Q 43.3125 58.59375 39.234375 62.25
|
||||
Q 35.15625 65.921875 28.609375 65.921875
|
||||
Q 23.96875 65.921875 18.8125 64.3125
|
||||
Q 13.671875 62.703125 7.8125 59.421875
|
||||
L 7.8125 69.390625
|
||||
Q 13.765625 71.78125 18.9375 73
|
||||
Q 24.125 74.21875 28.421875 74.21875
|
||||
Q 39.75 74.21875 46.484375 68.546875
|
||||
Q 53.21875 62.890625 53.21875 53.421875
|
||||
Q 53.21875 48.921875 51.53125 44.890625
|
||||
Q 49.859375 40.875 45.40625 35.40625
|
||||
Q 44.1875 33.984375 37.640625 27.21875
|
||||
Q 31.109375 20.453125 19.1875 8.296875
|
||||
z
|
||||
" id="DejaVuSans-50"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-50"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_3">
|
||||
<g id="text_3">
|
||||
<!-- 4 -->
|
||||
<g style="fill:#262626;" transform="translate(161.410625 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 37.796875 64.3125
|
||||
L 12.890625 25.390625
|
||||
L 37.796875 25.390625
|
||||
z
|
||||
M 35.203125 72.90625
|
||||
L 47.609375 72.90625
|
||||
L 47.609375 25.390625
|
||||
L 58.015625 25.390625
|
||||
L 58.015625 17.1875
|
||||
L 47.609375 17.1875
|
||||
L 47.609375 0
|
||||
L 37.796875 0
|
||||
L 37.796875 17.1875
|
||||
L 4.890625 17.1875
|
||||
L 4.890625 26.703125
|
||||
z
|
||||
" id="DejaVuSans-52"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-52"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_4">
|
||||
<g id="text_4">
|
||||
<!-- 8 -->
|
||||
<g style="fill:#262626;" transform="translate(206.050625 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 31.78125 34.625
|
||||
Q 24.75 34.625 20.71875 30.859375
|
||||
Q 16.703125 27.09375 16.703125 20.515625
|
||||
Q 16.703125 13.921875 20.71875 10.15625
|
||||
Q 24.75 6.390625 31.78125 6.390625
|
||||
Q 38.8125 6.390625 42.859375 10.171875
|
||||
Q 46.921875 13.96875 46.921875 20.515625
|
||||
Q 46.921875 27.09375 42.890625 30.859375
|
||||
Q 38.875 34.625 31.78125 34.625
|
||||
z
|
||||
M 21.921875 38.8125
|
||||
Q 15.578125 40.375 12.03125 44.71875
|
||||
Q 8.5 49.078125 8.5 55.328125
|
||||
Q 8.5 64.0625 14.71875 69.140625
|
||||
Q 20.953125 74.21875 31.78125 74.21875
|
||||
Q 42.671875 74.21875 48.875 69.140625
|
||||
Q 55.078125 64.0625 55.078125 55.328125
|
||||
Q 55.078125 49.078125 51.53125 44.71875
|
||||
Q 48 40.375 41.703125 38.8125
|
||||
Q 48.828125 37.15625 52.796875 32.3125
|
||||
Q 56.78125 27.484375 56.78125 20.515625
|
||||
Q 56.78125 9.90625 50.3125 4.234375
|
||||
Q 43.84375 -1.421875 31.78125 -1.421875
|
||||
Q 19.734375 -1.421875 13.25 4.234375
|
||||
Q 6.78125 9.90625 6.78125 20.515625
|
||||
Q 6.78125 27.484375 10.78125 32.3125
|
||||
Q 14.796875 37.15625 21.921875 38.8125
|
||||
z
|
||||
M 18.3125 54.390625
|
||||
Q 18.3125 48.734375 21.84375 45.5625
|
||||
Q 25.390625 42.390625 31.78125 42.390625
|
||||
Q 38.140625 42.390625 41.71875 45.5625
|
||||
Q 45.3125 48.734375 45.3125 54.390625
|
||||
Q 45.3125 60.0625 41.71875 63.234375
|
||||
Q 38.140625 66.40625 31.78125 66.40625
|
||||
Q 25.390625 66.40625 21.84375 63.234375
|
||||
Q 18.3125 60.0625 18.3125 54.390625
|
||||
z
|
||||
" id="DejaVuSans-56"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-56"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_5">
|
||||
<g id="text_5">
|
||||
<!-- 16 -->
|
||||
<g style="fill:#262626;" transform="translate(247.19125 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 33.015625 40.375
|
||||
Q 26.375 40.375 22.484375 35.828125
|
||||
Q 18.609375 31.296875 18.609375 23.390625
|
||||
Q 18.609375 15.53125 22.484375 10.953125
|
||||
Q 26.375 6.390625 33.015625 6.390625
|
||||
Q 39.65625 6.390625 43.53125 10.953125
|
||||
Q 47.40625 15.53125 47.40625 23.390625
|
||||
Q 47.40625 31.296875 43.53125 35.828125
|
||||
Q 39.65625 40.375 33.015625 40.375
|
||||
z
|
||||
M 52.59375 71.296875
|
||||
L 52.59375 62.3125
|
||||
Q 48.875 64.0625 45.09375 64.984375
|
||||
Q 41.3125 65.921875 37.59375 65.921875
|
||||
Q 27.828125 65.921875 22.671875 59.328125
|
||||
Q 17.53125 52.734375 16.796875 39.40625
|
||||
Q 19.671875 43.65625 24.015625 45.921875
|
||||
Q 28.375 48.1875 33.59375 48.1875
|
||||
Q 44.578125 48.1875 50.953125 41.515625
|
||||
Q 57.328125 34.859375 57.328125 23.390625
|
||||
Q 57.328125 12.15625 50.6875 5.359375
|
||||
Q 44.046875 -1.421875 33.015625 -1.421875
|
||||
Q 20.359375 -1.421875 13.671875 8.265625
|
||||
Q 6.984375 17.96875 6.984375 36.375
|
||||
Q 6.984375 53.65625 15.1875 63.9375
|
||||
Q 23.390625 74.21875 37.203125 74.21875
|
||||
Q 40.921875 74.21875 44.703125 73.484375
|
||||
Q 48.484375 72.75 52.59375 71.296875
|
||||
z
|
||||
" id="DejaVuSans-54"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-54"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_6">
|
||||
<g id="text_6">
|
||||
<!-- 32 -->
|
||||
<g style="fill:#262626;" transform="translate(291.83125 306.288406)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 40.578125 39.3125
|
||||
Q 47.65625 37.796875 51.625 33
|
||||
Q 55.609375 28.21875 55.609375 21.1875
|
||||
Q 55.609375 10.40625 48.1875 4.484375
|
||||
Q 40.765625 -1.421875 27.09375 -1.421875
|
||||
Q 22.515625 -1.421875 17.65625 -0.515625
|
||||
Q 12.796875 0.390625 7.625 2.203125
|
||||
L 7.625 11.71875
|
||||
Q 11.71875 9.328125 16.59375 8.109375
|
||||
Q 21.484375 6.890625 26.8125 6.890625
|
||||
Q 36.078125 6.890625 40.9375 10.546875
|
||||
Q 45.796875 14.203125 45.796875 21.1875
|
||||
Q 45.796875 27.640625 41.28125 31.265625
|
||||
Q 36.765625 34.90625 28.71875 34.90625
|
||||
L 20.21875 34.90625
|
||||
L 20.21875 43.015625
|
||||
L 29.109375 43.015625
|
||||
Q 36.375 43.015625 40.234375 45.921875
|
||||
Q 44.09375 48.828125 44.09375 54.296875
|
||||
Q 44.09375 59.90625 40.109375 62.90625
|
||||
Q 36.140625 65.921875 28.71875 65.921875
|
||||
Q 24.65625 65.921875 20.015625 65.03125
|
||||
Q 15.375 64.15625 9.8125 62.3125
|
||||
L 9.8125 71.09375
|
||||
Q 15.4375 72.65625 20.34375 73.4375
|
||||
Q 25.25 74.21875 29.59375 74.21875
|
||||
Q 40.828125 74.21875 47.359375 69.109375
|
||||
Q 53.90625 64.015625 53.90625 55.328125
|
||||
Q 53.90625 49.265625 50.4375 45.09375
|
||||
Q 46.96875 40.921875 40.578125 39.3125
|
||||
z
|
||||
" id="DejaVuSans-51"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-51"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-50"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_7">
|
||||
<g id="text_7">
|
||||
<!-- 64 -->
|
||||
<g style="fill:#262626;" transform="translate(336.47125 306.288406)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-54"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-52"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="xtick_8">
|
||||
<g id="text_8">
|
||||
<!-- 128 -->
|
||||
<g style="fill:#262626;" transform="translate(377.611875 306.288406)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-50"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-56"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="text_9">
|
||||
<!-- Client Batch Size -->
|
||||
<g style="fill:#262626;" transform="translate(181.122187 321.694187)scale(0.12 -0.12)">
|
||||
<defs>
|
||||
<path d="M 64.40625 67.28125
|
||||
L 64.40625 56.890625
|
||||
Q 59.421875 61.53125 53.78125 63.8125
|
||||
Q 48.140625 66.109375 41.796875 66.109375
|
||||
Q 29.296875 66.109375 22.65625 58.46875
|
||||
Q 16.015625 50.828125 16.015625 36.375
|
||||
Q 16.015625 21.96875 22.65625 14.328125
|
||||
Q 29.296875 6.6875 41.796875 6.6875
|
||||
Q 48.140625 6.6875 53.78125 8.984375
|
||||
Q 59.421875 11.28125 64.40625 15.921875
|
||||
L 64.40625 5.609375
|
||||
Q 59.234375 2.09375 53.4375 0.328125
|
||||
Q 47.65625 -1.421875 41.21875 -1.421875
|
||||
Q 24.65625 -1.421875 15.125 8.703125
|
||||
Q 5.609375 18.84375 5.609375 36.375
|
||||
Q 5.609375 53.953125 15.125 64.078125
|
||||
Q 24.65625 74.21875 41.21875 74.21875
|
||||
Q 47.75 74.21875 53.53125 72.484375
|
||||
Q 59.328125 70.75 64.40625 67.28125
|
||||
z
|
||||
" id="DejaVuSans-67"/>
|
||||
<path d="M 9.421875 75.984375
|
||||
L 18.40625 75.984375
|
||||
L 18.40625 0
|
||||
L 9.421875 0
|
||||
z
|
||||
" id="DejaVuSans-108"/>
|
||||
<path d="M 9.421875 54.6875
|
||||
L 18.40625 54.6875
|
||||
L 18.40625 0
|
||||
L 9.421875 0
|
||||
z
|
||||
M 9.421875 75.984375
|
||||
L 18.40625 75.984375
|
||||
L 18.40625 64.59375
|
||||
L 9.421875 64.59375
|
||||
z
|
||||
" id="DejaVuSans-105"/>
|
||||
<path d="M 56.203125 29.59375
|
||||
L 56.203125 25.203125
|
||||
L 14.890625 25.203125
|
||||
Q 15.484375 15.921875 20.484375 11.0625
|
||||
Q 25.484375 6.203125 34.421875 6.203125
|
||||
Q 39.59375 6.203125 44.453125 7.46875
|
||||
Q 49.3125 8.734375 54.109375 11.28125
|
||||
L 54.109375 2.78125
|
||||
Q 49.265625 0.734375 44.1875 -0.34375
|
||||
Q 39.109375 -1.421875 33.890625 -1.421875
|
||||
Q 20.796875 -1.421875 13.15625 6.1875
|
||||
Q 5.515625 13.8125 5.515625 26.8125
|
||||
Q 5.515625 40.234375 12.765625 48.109375
|
||||
Q 20.015625 56 32.328125 56
|
||||
Q 43.359375 56 49.78125 48.890625
|
||||
Q 56.203125 41.796875 56.203125 29.59375
|
||||
z
|
||||
M 47.21875 32.234375
|
||||
Q 47.125 39.59375 43.09375 43.984375
|
||||
Q 39.0625 48.390625 32.421875 48.390625
|
||||
Q 24.90625 48.390625 20.390625 44.140625
|
||||
Q 15.875 39.890625 15.1875 32.171875
|
||||
z
|
||||
" id="DejaVuSans-101"/>
|
||||
<path d="M 54.890625 33.015625
|
||||
L 54.890625 0
|
||||
L 45.90625 0
|
||||
L 45.90625 32.71875
|
||||
Q 45.90625 40.484375 42.875 44.328125
|
||||
Q 39.84375 48.1875 33.796875 48.1875
|
||||
Q 26.515625 48.1875 22.3125 43.546875
|
||||
Q 18.109375 38.921875 18.109375 30.90625
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 54.6875
|
||||
L 18.109375 54.6875
|
||||
L 18.109375 46.1875
|
||||
Q 21.34375 51.125 25.703125 53.5625
|
||||
Q 30.078125 56 35.796875 56
|
||||
Q 45.21875 56 50.046875 50.171875
|
||||
Q 54.890625 44.34375 54.890625 33.015625
|
||||
z
|
||||
" id="DejaVuSans-110"/>
|
||||
<path d="M 18.3125 70.21875
|
||||
L 18.3125 54.6875
|
||||
L 36.8125 54.6875
|
||||
L 36.8125 47.703125
|
||||
L 18.3125 47.703125
|
||||
L 18.3125 18.015625
|
||||
Q 18.3125 11.328125 20.140625 9.421875
|
||||
Q 21.96875 7.515625 27.59375 7.515625
|
||||
L 36.8125 7.515625
|
||||
L 36.8125 0
|
||||
L 27.59375 0
|
||||
Q 17.1875 0 13.234375 3.875
|
||||
Q 9.28125 7.765625 9.28125 18.015625
|
||||
L 9.28125 47.703125
|
||||
L 2.6875 47.703125
|
||||
L 2.6875 54.6875
|
||||
L 9.28125 54.6875
|
||||
L 9.28125 70.21875
|
||||
z
|
||||
" id="DejaVuSans-116"/>
|
||||
<path id="DejaVuSans-32"/>
|
||||
<path d="M 19.671875 34.8125
|
||||
L 19.671875 8.109375
|
||||
L 35.5 8.109375
|
||||
Q 43.453125 8.109375 47.28125 11.40625
|
||||
Q 51.125 14.703125 51.125 21.484375
|
||||
Q 51.125 28.328125 47.28125 31.5625
|
||||
Q 43.453125 34.8125 35.5 34.8125
|
||||
z
|
||||
M 19.671875 64.796875
|
||||
L 19.671875 42.828125
|
||||
L 34.28125 42.828125
|
||||
Q 41.5 42.828125 45.03125 45.53125
|
||||
Q 48.578125 48.25 48.578125 53.8125
|
||||
Q 48.578125 59.328125 45.03125 62.0625
|
||||
Q 41.5 64.796875 34.28125 64.796875
|
||||
z
|
||||
M 9.8125 72.90625
|
||||
L 35.015625 72.90625
|
||||
Q 46.296875 72.90625 52.390625 68.21875
|
||||
Q 58.5 63.53125 58.5 54.890625
|
||||
Q 58.5 48.1875 55.375 44.234375
|
||||
Q 52.25 40.28125 46.1875 39.3125
|
||||
Q 53.46875 37.75 57.5 32.78125
|
||||
Q 61.53125 27.828125 61.53125 20.40625
|
||||
Q 61.53125 10.640625 54.890625 5.3125
|
||||
Q 48.25 0 35.984375 0
|
||||
L 9.8125 0
|
||||
z
|
||||
" id="DejaVuSans-66"/>
|
||||
<path d="M 34.28125 27.484375
|
||||
Q 23.390625 27.484375 19.1875 25
|
||||
Q 14.984375 22.515625 14.984375 16.5
|
||||
Q 14.984375 11.71875 18.140625 8.90625
|
||||
Q 21.296875 6.109375 26.703125 6.109375
|
||||
Q 34.1875 6.109375 38.703125 11.40625
|
||||
Q 43.21875 16.703125 43.21875 25.484375
|
||||
L 43.21875 27.484375
|
||||
z
|
||||
M 52.203125 31.203125
|
||||
L 52.203125 0
|
||||
L 43.21875 0
|
||||
L 43.21875 8.296875
|
||||
Q 40.140625 3.328125 35.546875 0.953125
|
||||
Q 30.953125 -1.421875 24.3125 -1.421875
|
||||
Q 15.921875 -1.421875 10.953125 3.296875
|
||||
Q 6 8.015625 6 15.921875
|
||||
Q 6 25.140625 12.171875 29.828125
|
||||
Q 18.359375 34.515625 30.609375 34.515625
|
||||
L 43.21875 34.515625
|
||||
L 43.21875 35.40625
|
||||
Q 43.21875 41.609375 39.140625 45
|
||||
Q 35.0625 48.390625 27.6875 48.390625
|
||||
Q 23 48.390625 18.546875 47.265625
|
||||
Q 14.109375 46.140625 10.015625 43.890625
|
||||
L 10.015625 52.203125
|
||||
Q 14.9375 54.109375 19.578125 55.046875
|
||||
Q 24.21875 56 28.609375 56
|
||||
Q 40.484375 56 46.34375 49.84375
|
||||
Q 52.203125 43.703125 52.203125 31.203125
|
||||
z
|
||||
" id="DejaVuSans-97"/>
|
||||
<path d="M 48.78125 52.59375
|
||||
L 48.78125 44.1875
|
||||
Q 44.96875 46.296875 41.140625 47.34375
|
||||
Q 37.3125 48.390625 33.40625 48.390625
|
||||
Q 24.65625 48.390625 19.8125 42.84375
|
||||
Q 14.984375 37.3125 14.984375 27.296875
|
||||
Q 14.984375 17.28125 19.8125 11.734375
|
||||
Q 24.65625 6.203125 33.40625 6.203125
|
||||
Q 37.3125 6.203125 41.140625 7.25
|
||||
Q 44.96875 8.296875 48.78125 10.40625
|
||||
L 48.78125 2.09375
|
||||
Q 45.015625 0.34375 40.984375 -0.53125
|
||||
Q 36.96875 -1.421875 32.421875 -1.421875
|
||||
Q 20.0625 -1.421875 12.78125 6.34375
|
||||
Q 5.515625 14.109375 5.515625 27.296875
|
||||
Q 5.515625 40.671875 12.859375 48.328125
|
||||
Q 20.21875 56 33.015625 56
|
||||
Q 37.15625 56 41.109375 55.140625
|
||||
Q 45.0625 54.296875 48.78125 52.59375
|
||||
z
|
||||
" id="DejaVuSans-99"/>
|
||||
<path d="M 54.890625 33.015625
|
||||
L 54.890625 0
|
||||
L 45.90625 0
|
||||
L 45.90625 32.71875
|
||||
Q 45.90625 40.484375 42.875 44.328125
|
||||
Q 39.84375 48.1875 33.796875 48.1875
|
||||
Q 26.515625 48.1875 22.3125 43.546875
|
||||
Q 18.109375 38.921875 18.109375 30.90625
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 75.984375
|
||||
L 18.109375 75.984375
|
||||
L 18.109375 46.1875
|
||||
Q 21.34375 51.125 25.703125 53.5625
|
||||
Q 30.078125 56 35.796875 56
|
||||
Q 45.21875 56 50.046875 50.171875
|
||||
Q 54.890625 44.34375 54.890625 33.015625
|
||||
z
|
||||
" id="DejaVuSans-104"/>
|
||||
<path d="M 53.515625 70.515625
|
||||
L 53.515625 60.890625
|
||||
Q 47.90625 63.578125 42.921875 64.890625
|
||||
Q 37.9375 66.21875 33.296875 66.21875
|
||||
Q 25.25 66.21875 20.875 63.09375
|
||||
Q 16.5 59.96875 16.5 54.203125
|
||||
Q 16.5 49.359375 19.40625 46.890625
|
||||
Q 22.3125 44.4375 30.421875 42.921875
|
||||
L 36.375 41.703125
|
||||
Q 47.40625 39.59375 52.65625 34.296875
|
||||
Q 57.90625 29 57.90625 20.125
|
||||
Q 57.90625 9.515625 50.796875 4.046875
|
||||
Q 43.703125 -1.421875 29.984375 -1.421875
|
||||
Q 24.8125 -1.421875 18.96875 -0.25
|
||||
Q 13.140625 0.921875 6.890625 3.21875
|
||||
L 6.890625 13.375
|
||||
Q 12.890625 10.015625 18.65625 8.296875
|
||||
Q 24.421875 6.59375 29.984375 6.59375
|
||||
Q 38.421875 6.59375 43.015625 9.90625
|
||||
Q 47.609375 13.234375 47.609375 19.390625
|
||||
Q 47.609375 24.75 44.3125 27.78125
|
||||
Q 41.015625 30.8125 33.5 32.328125
|
||||
L 27.484375 33.5
|
||||
Q 16.453125 35.6875 11.515625 40.375
|
||||
Q 6.59375 45.0625 6.59375 53.421875
|
||||
Q 6.59375 63.09375 13.40625 68.65625
|
||||
Q 20.21875 74.21875 32.171875 74.21875
|
||||
Q 37.3125 74.21875 42.625 73.28125
|
||||
Q 47.953125 72.359375 53.515625 70.515625
|
||||
z
|
||||
" id="DejaVuSans-83"/>
|
||||
<path d="M 5.515625 54.6875
|
||||
L 48.1875 54.6875
|
||||
L 48.1875 46.484375
|
||||
L 14.40625 7.171875
|
||||
L 48.1875 7.171875
|
||||
L 48.1875 0
|
||||
L 4.296875 0
|
||||
L 4.296875 8.203125
|
||||
L 38.09375 47.515625
|
||||
L 5.515625 47.515625
|
||||
z
|
||||
" id="DejaVuSans-122"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-67"/>
|
||||
<use x="69.824219" xlink:href="#DejaVuSans-108"/>
|
||||
<use x="97.607422" xlink:href="#DejaVuSans-105"/>
|
||||
<use x="125.390625" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="186.914062" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="250.292969" xlink:href="#DejaVuSans-116"/>
|
||||
<use x="289.501953" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="321.289062" xlink:href="#DejaVuSans-66"/>
|
||||
<use x="389.892578" xlink:href="#DejaVuSans-97"/>
|
||||
<use x="451.171875" xlink:href="#DejaVuSans-116"/>
|
||||
<use x="490.380859" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="545.361328" xlink:href="#DejaVuSans-104"/>
|
||||
<use x="608.740234" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="640.527344" xlink:href="#DejaVuSans-83"/>
|
||||
<use x="704.003906" xlink:href="#DejaVuSans-105"/>
|
||||
<use x="731.787109" xlink:href="#DejaVuSans-122"/>
|
||||
<use x="784.277344" xlink:href="#DejaVuSans-101"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="matplotlib.axis_2">
|
||||
<g id="ytick_1">
|
||||
<g id="line2d_1">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 53.31 288.430125
|
||||
L 410.43 288.430125
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_10">
|
||||
<!-- 0 -->
|
||||
<g style="fill:#262626;" transform="translate(36.81125 292.609266)scale(0.11 -0.11)">
|
||||
<defs>
|
||||
<path d="M 31.78125 66.40625
|
||||
Q 24.171875 66.40625 20.328125 58.90625
|
||||
Q 16.5 51.421875 16.5 36.375
|
||||
Q 16.5 21.390625 20.328125 13.890625
|
||||
Q 24.171875 6.390625 31.78125 6.390625
|
||||
Q 39.453125 6.390625 43.28125 13.890625
|
||||
Q 47.125 21.390625 47.125 36.375
|
||||
Q 47.125 51.421875 43.28125 58.90625
|
||||
Q 39.453125 66.40625 31.78125 66.40625
|
||||
z
|
||||
M 31.78125 74.21875
|
||||
Q 44.046875 74.21875 50.515625 64.515625
|
||||
Q 56.984375 54.828125 56.984375 36.375
|
||||
Q 56.984375 17.96875 50.515625 8.265625
|
||||
Q 44.046875 -1.421875 31.78125 -1.421875
|
||||
Q 19.53125 -1.421875 13.0625 8.265625
|
||||
Q 6.59375 17.96875 6.59375 36.375
|
||||
Q 6.59375 54.828125 13.0625 64.515625
|
||||
Q 19.53125 74.21875 31.78125 74.21875
|
||||
z
|
||||
" id="DejaVuSans-48"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_2">
|
||||
<g id="line2d_2">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 53.31 242.325216
|
||||
L 410.43 242.325216
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_11">
|
||||
<!-- 20 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 246.504357)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-50"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_3">
|
||||
<g id="line2d_3">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 53.31 196.220308
|
||||
L 410.43 196.220308
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_12">
|
||||
<!-- 40 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 200.399448)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-52"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_4">
|
||||
<g id="line2d_4">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 53.31 150.115399
|
||||
L 410.43 150.115399
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_13">
|
||||
<!-- 60 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 154.29454)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-54"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_5">
|
||||
<g id="line2d_5">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 53.31 104.01049
|
||||
L 410.43 104.01049
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_14">
|
||||
<!-- 80 -->
|
||||
<g style="fill:#262626;" transform="translate(29.8125 108.189631)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-56"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="ytick_6">
|
||||
<g id="line2d_6">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 53.31 57.905582
|
||||
L 410.43 57.905582
|
||||
" style="fill:none;stroke:#c0c0c0;stroke-linecap:round;stroke-width:0.5;"/>
|
||||
</g>
|
||||
<g id="text_15">
|
||||
<!-- 100 -->
|
||||
<g style="fill:#262626;" transform="translate(22.81375 62.084722)scale(0.11 -0.11)">
|
||||
<use xlink:href="#DejaVuSans-49"/>
|
||||
<use x="63.623047" xlink:href="#DejaVuSans-48"/>
|
||||
<use x="127.246094" xlink:href="#DejaVuSans-48"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="text_16">
|
||||
<!-- Avg Latency -->
|
||||
<g style="fill:#262626;" transform="translate(16.318125 192.110062)rotate(-90)scale(0.12 -0.12)">
|
||||
<defs>
|
||||
<path d="M 34.1875 63.1875
|
||||
L 20.796875 26.90625
|
||||
L 47.609375 26.90625
|
||||
z
|
||||
M 28.609375 72.90625
|
||||
L 39.796875 72.90625
|
||||
L 67.578125 0
|
||||
L 57.328125 0
|
||||
L 50.6875 18.703125
|
||||
L 17.828125 18.703125
|
||||
L 11.1875 0
|
||||
L 0.78125 0
|
||||
z
|
||||
" id="DejaVuSans-65"/>
|
||||
<path d="M 2.984375 54.6875
|
||||
L 12.5 54.6875
|
||||
L 29.59375 8.796875
|
||||
L 46.6875 54.6875
|
||||
L 56.203125 54.6875
|
||||
L 35.6875 0
|
||||
L 23.484375 0
|
||||
z
|
||||
" id="DejaVuSans-118"/>
|
||||
<path d="M 45.40625 27.984375
|
||||
Q 45.40625 37.75 41.375 43.109375
|
||||
Q 37.359375 48.484375 30.078125 48.484375
|
||||
Q 22.859375 48.484375 18.828125 43.109375
|
||||
Q 14.796875 37.75 14.796875 27.984375
|
||||
Q 14.796875 18.265625 18.828125 12.890625
|
||||
Q 22.859375 7.515625 30.078125 7.515625
|
||||
Q 37.359375 7.515625 41.375 12.890625
|
||||
Q 45.40625 18.265625 45.40625 27.984375
|
||||
z
|
||||
M 54.390625 6.78125
|
||||
Q 54.390625 -7.171875 48.1875 -13.984375
|
||||
Q 42 -20.796875 29.203125 -20.796875
|
||||
Q 24.46875 -20.796875 20.265625 -20.09375
|
||||
Q 16.0625 -19.390625 12.109375 -17.921875
|
||||
L 12.109375 -9.1875
|
||||
Q 16.0625 -11.328125 19.921875 -12.34375
|
||||
Q 23.78125 -13.375 27.78125 -13.375
|
||||
Q 36.625 -13.375 41.015625 -8.765625
|
||||
Q 45.40625 -4.15625 45.40625 5.171875
|
||||
L 45.40625 9.625
|
||||
Q 42.625 4.78125 38.28125 2.390625
|
||||
Q 33.9375 0 27.875 0
|
||||
Q 17.828125 0 11.671875 7.65625
|
||||
Q 5.515625 15.328125 5.515625 27.984375
|
||||
Q 5.515625 40.671875 11.671875 48.328125
|
||||
Q 17.828125 56 27.875 56
|
||||
Q 33.9375 56 38.28125 53.609375
|
||||
Q 42.625 51.21875 45.40625 46.390625
|
||||
L 45.40625 54.6875
|
||||
L 54.390625 54.6875
|
||||
z
|
||||
" id="DejaVuSans-103"/>
|
||||
<path d="M 9.8125 72.90625
|
||||
L 19.671875 72.90625
|
||||
L 19.671875 8.296875
|
||||
L 55.171875 8.296875
|
||||
L 55.171875 0
|
||||
L 9.8125 0
|
||||
z
|
||||
" id="DejaVuSans-76"/>
|
||||
<path d="M 32.171875 -5.078125
|
||||
Q 28.375 -14.84375 24.75 -17.8125
|
||||
Q 21.140625 -20.796875 15.09375 -20.796875
|
||||
L 7.90625 -20.796875
|
||||
L 7.90625 -13.28125
|
||||
L 13.1875 -13.28125
|
||||
Q 16.890625 -13.28125 18.9375 -11.515625
|
||||
Q 21 -9.765625 23.484375 -3.21875
|
||||
L 25.09375 0.875
|
||||
L 2.984375 54.6875
|
||||
L 12.5 54.6875
|
||||
L 29.59375 11.921875
|
||||
L 46.6875 54.6875
|
||||
L 56.203125 54.6875
|
||||
z
|
||||
" id="DejaVuSans-121"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-65"/>
|
||||
<use x="62.533203" xlink:href="#DejaVuSans-118"/>
|
||||
<use x="121.712891" xlink:href="#DejaVuSans-103"/>
|
||||
<use x="185.189453" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="216.976562" xlink:href="#DejaVuSans-76"/>
|
||||
<use x="272.689453" xlink:href="#DejaVuSans-97"/>
|
||||
<use x="333.96875" xlink:href="#DejaVuSans-116"/>
|
||||
<use x="373.177734" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="434.701172" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="498.080078" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="553.060547" xlink:href="#DejaVuSans-121"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<g id="patch_3">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 57.774 288.430125
|
||||
L 93.486 288.430125
|
||||
L 93.486 282.496423
|
||||
L 57.774 282.496423
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_4">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 102.414 288.430125
|
||||
L 138.126 288.430125
|
||||
L 138.126 281.203181
|
||||
L 102.414 281.203181
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_5">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 147.054 288.430125
|
||||
L 182.766 288.430125
|
||||
L 182.766 279.047776
|
||||
L 147.054 279.047776
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_6">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 191.694 288.430125
|
||||
L 227.406 288.430125
|
||||
L 227.406 272.83514
|
||||
L 191.694 272.83514
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_7">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 236.334 288.430125
|
||||
L 272.046 288.430125
|
||||
L 272.046 262.931805
|
||||
L 236.334 262.931805
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_8">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 280.974 288.430125
|
||||
L 316.686 288.430125
|
||||
L 316.686 234.019417
|
||||
L 280.974 234.019417
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_9">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 325.614 288.430125
|
||||
L 361.326 288.430125
|
||||
L 361.326 188.380168
|
||||
L 325.614 188.380168
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_10">
|
||||
<path clip-path="url(#p9ba82f1e29)" d="M 370.254 288.430125
|
||||
L 405.966 288.430125
|
||||
L 405.966 66.670125
|
||||
L 370.254 66.670125
|
||||
z
|
||||
" style="fill:#5875a4;stroke:#ffffff;stroke-linejoin:miter;"/>
|
||||
</g>
|
||||
<g id="patch_11">
|
||||
<path d="M 53.31 288.430125
|
||||
L 53.31 22.318125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="patch_12">
|
||||
<path d="M 410.43 288.430125
|
||||
L 410.43 22.318125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="patch_13">
|
||||
<path d="M 53.31 288.430125
|
||||
L 410.43 288.430125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="patch_14">
|
||||
<path d="M 53.31 22.318125
|
||||
L 410.43 22.318125
|
||||
" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:2;"/>
|
||||
</g>
|
||||
<g id="text_17">
|
||||
<!-- Performance offline -->
|
||||
<g style="fill:#262626;" transform="translate(173.220937 16.318125)scale(0.12 -0.12)">
|
||||
<defs>
|
||||
<path d="M 19.671875 64.796875
|
||||
L 19.671875 37.40625
|
||||
L 32.078125 37.40625
|
||||
Q 38.96875 37.40625 42.71875 40.96875
|
||||
Q 46.484375 44.53125 46.484375 51.125
|
||||
Q 46.484375 57.671875 42.71875 61.234375
|
||||
Q 38.96875 64.796875 32.078125 64.796875
|
||||
z
|
||||
M 9.8125 72.90625
|
||||
L 32.078125 72.90625
|
||||
Q 44.34375 72.90625 50.609375 67.359375
|
||||
Q 56.890625 61.8125 56.890625 51.125
|
||||
Q 56.890625 40.328125 50.609375 34.8125
|
||||
Q 44.34375 29.296875 32.078125 29.296875
|
||||
L 19.671875 29.296875
|
||||
L 19.671875 0
|
||||
L 9.8125 0
|
||||
z
|
||||
" id="DejaVuSans-80"/>
|
||||
<path d="M 41.109375 46.296875
|
||||
Q 39.59375 47.171875 37.8125 47.578125
|
||||
Q 36.03125 48 33.890625 48
|
||||
Q 26.265625 48 22.1875 43.046875
|
||||
Q 18.109375 38.09375 18.109375 28.8125
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 54.6875
|
||||
L 18.109375 54.6875
|
||||
L 18.109375 46.1875
|
||||
Q 20.953125 51.171875 25.484375 53.578125
|
||||
Q 30.03125 56 36.53125 56
|
||||
Q 37.453125 56 38.578125 55.875
|
||||
Q 39.703125 55.765625 41.0625 55.515625
|
||||
z
|
||||
" id="DejaVuSans-114"/>
|
||||
<path d="M 37.109375 75.984375
|
||||
L 37.109375 68.5
|
||||
L 28.515625 68.5
|
||||
Q 23.6875 68.5 21.796875 66.546875
|
||||
Q 19.921875 64.59375 19.921875 59.515625
|
||||
L 19.921875 54.6875
|
||||
L 34.71875 54.6875
|
||||
L 34.71875 47.703125
|
||||
L 19.921875 47.703125
|
||||
L 19.921875 0
|
||||
L 10.890625 0
|
||||
L 10.890625 47.703125
|
||||
L 2.296875 47.703125
|
||||
L 2.296875 54.6875
|
||||
L 10.890625 54.6875
|
||||
L 10.890625 58.5
|
||||
Q 10.890625 67.625 15.140625 71.796875
|
||||
Q 19.390625 75.984375 28.609375 75.984375
|
||||
z
|
||||
" id="DejaVuSans-102"/>
|
||||
<path d="M 30.609375 48.390625
|
||||
Q 23.390625 48.390625 19.1875 42.75
|
||||
Q 14.984375 37.109375 14.984375 27.296875
|
||||
Q 14.984375 17.484375 19.15625 11.84375
|
||||
Q 23.34375 6.203125 30.609375 6.203125
|
||||
Q 37.796875 6.203125 41.984375 11.859375
|
||||
Q 46.1875 17.53125 46.1875 27.296875
|
||||
Q 46.1875 37.015625 41.984375 42.703125
|
||||
Q 37.796875 48.390625 30.609375 48.390625
|
||||
z
|
||||
M 30.609375 56
|
||||
Q 42.328125 56 49.015625 48.375
|
||||
Q 55.71875 40.765625 55.71875 27.296875
|
||||
Q 55.71875 13.875 49.015625 6.21875
|
||||
Q 42.328125 -1.421875 30.609375 -1.421875
|
||||
Q 18.84375 -1.421875 12.171875 6.21875
|
||||
Q 5.515625 13.875 5.515625 27.296875
|
||||
Q 5.515625 40.765625 12.171875 48.375
|
||||
Q 18.84375 56 30.609375 56
|
||||
z
|
||||
" id="DejaVuSans-111"/>
|
||||
<path d="M 52 44.1875
|
||||
Q 55.375 50.25 60.0625 53.125
|
||||
Q 64.75 56 71.09375 56
|
||||
Q 79.640625 56 84.28125 50.015625
|
||||
Q 88.921875 44.046875 88.921875 33.015625
|
||||
L 88.921875 0
|
||||
L 79.890625 0
|
||||
L 79.890625 32.71875
|
||||
Q 79.890625 40.578125 77.09375 44.375
|
||||
Q 74.3125 48.1875 68.609375 48.1875
|
||||
Q 61.625 48.1875 57.5625 43.546875
|
||||
Q 53.515625 38.921875 53.515625 30.90625
|
||||
L 53.515625 0
|
||||
L 44.484375 0
|
||||
L 44.484375 32.71875
|
||||
Q 44.484375 40.625 41.703125 44.40625
|
||||
Q 38.921875 48.1875 33.109375 48.1875
|
||||
Q 26.21875 48.1875 22.15625 43.53125
|
||||
Q 18.109375 38.875 18.109375 30.90625
|
||||
L 18.109375 0
|
||||
L 9.078125 0
|
||||
L 9.078125 54.6875
|
||||
L 18.109375 54.6875
|
||||
L 18.109375 46.1875
|
||||
Q 21.1875 51.21875 25.484375 53.609375
|
||||
Q 29.78125 56 35.6875 56
|
||||
Q 41.65625 56 45.828125 52.96875
|
||||
Q 50 49.953125 52 44.1875
|
||||
z
|
||||
" id="DejaVuSans-109"/>
|
||||
</defs>
|
||||
<use xlink:href="#DejaVuSans-80"/>
|
||||
<use x="56.677734" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="118.201172" xlink:href="#DejaVuSans-114"/>
|
||||
<use x="159.314453" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="194.519531" xlink:href="#DejaVuSans-111"/>
|
||||
<use x="255.701172" xlink:href="#DejaVuSans-114"/>
|
||||
<use x="295.064453" xlink:href="#DejaVuSans-109"/>
|
||||
<use x="392.476562" xlink:href="#DejaVuSans-97"/>
|
||||
<use x="453.755859" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="517.134766" xlink:href="#DejaVuSans-99"/>
|
||||
<use x="572.115234" xlink:href="#DejaVuSans-101"/>
|
||||
<use x="633.638672" xlink:href="#DejaVuSans-32"/>
|
||||
<use x="665.425781" xlink:href="#DejaVuSans-111"/>
|
||||
<use x="726.607422" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="761.8125" xlink:href="#DejaVuSans-102"/>
|
||||
<use x="797.017578" xlink:href="#DejaVuSans-108"/>
|
||||
<use x="824.800781" xlink:href="#DejaVuSans-105"/>
|
||||
<use x="852.583984" xlink:href="#DejaVuSans-110"/>
|
||||
<use x="915.962891" xlink:href="#DejaVuSans-101"/>
|
||||
</g>
|
||||
</g>
|
||||
<g id="legend_1"/>
|
||||
</g>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="p9ba82f1e29">
|
||||
<rect height="266.112" width="357.12" x="53.31" y="22.318125"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 92 KiB |
After Width: | Height: | Size: 94 KiB |
After Width: | Height: | Size: 94 KiB |
After Width: | Height: | Size: 93 KiB |
127
TensorFlow/Classification/ConvNets/triton/process_dataset.py
Normal file
|
@ -0,0 +1,127 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Dict, List
|
||||
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
DATASETS_DIR = os.environ.get("DATASETS_DIR", None)
|
||||
IMAGENET_DIRNAME = "imagenet"
|
||||
IMAGE_ARCHIVE_FILENAME = "ILSVRC2012_img_val.tar"
|
||||
DEVKIT_ARCHIVE_FILENAME = "ILSVRC2012_devkit_t12.tar.gz"
|
||||
LABELS_REL_PATH = "ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt"
|
||||
META_REL_PATH = "ILSVRC2012_devkit_t12/data/meta.mat"
|
||||
|
||||
TARGET_SIZE = (224, 224) # (width, height)
|
||||
_RESIZE_MIN = 256 # resize preserving aspect ratio to where this is minimal size
|
||||
|
||||
|
||||
def parse_meta_mat(metafile) -> Dict[int, str]:
|
||||
import scipy.io
|
||||
|
||||
meta = scipy.io.loadmat(metafile, squeeze_me=True)["synsets"]
|
||||
nums_children = list(zip(*meta))[4]
|
||||
meta = [meta[idx] for idx, num_children in enumerate(nums_children) if num_children == 0]
|
||||
idcs, wnids = list(zip(*meta))[:2]
|
||||
idx_to_wnid = {idx: wnid for idx, wnid in zip(idcs, wnids)}
|
||||
return idx_to_wnid
|
||||
|
||||
|
||||
def _process_image(image_file, target_size):
|
||||
image = Image.open(image_file)
|
||||
original_size = image.size
|
||||
|
||||
# scale image to size where minimal size is _RESIZE_MIN
|
||||
scale_factor = max(_RESIZE_MIN / original_size[0], _RESIZE_MIN / original_size[1])
|
||||
resize_to = int(original_size[0] * scale_factor), int(original_size[1] * scale_factor)
|
||||
resized_image = image.resize(resize_to)
|
||||
|
||||
# central crop of image to target_size
|
||||
left, upper = (resize_to[0] - target_size[0]) // 2, (resize_to[1] - target_size[1]) // 2
|
||||
cropped_image = resized_image.crop((left, upper, left + target_size[0], upper + target_size[1]))
|
||||
return cropped_image
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="short_description")
|
||||
parser.add_argument(
|
||||
"--dataset-dir",
|
||||
help="Path to dataset directory where imagenet archives are stored and processed files will be saved.",
|
||||
required=False,
|
||||
default=DATASETS_DIR,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target-size",
|
||||
help="Size of target image. Format it as <width>,<height>.",
|
||||
required=False,
|
||||
default=",".join(map(str, TARGET_SIZE)),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dataset_dir is None:
|
||||
raise ValueError(
|
||||
"Please set $DATASETS_DIR env variable to point dataset dir with original dataset archives "
|
||||
"and where processed files should be stored. Alternatively provide --dataset-dir CLI argument"
|
||||
)
|
||||
|
||||
datasets_dir = Path(args.dataset_dir)
|
||||
target_size = tuple(map(int, args.target_size.split(",")))
|
||||
|
||||
image_archive_path = datasets_dir / IMAGE_ARCHIVE_FILENAME
|
||||
if not image_archive_path.exists():
|
||||
raise RuntimeError(
|
||||
f"There should be {IMAGE_ARCHIVE_FILENAME} file in {datasets_dir}."
|
||||
f"You need to download the dataset from http://www.image-net.org/download."
|
||||
)
|
||||
|
||||
devkit_archive_path = datasets_dir / DEVKIT_ARCHIVE_FILENAME
|
||||
if not devkit_archive_path.exists():
|
||||
raise RuntimeError(
|
||||
f"There should be {DEVKIT_ARCHIVE_FILENAME} file in {datasets_dir}."
|
||||
f"You need to download the dataset from http://www.image-net.org/download."
|
||||
)
|
||||
|
||||
with tarfile.open(devkit_archive_path, mode="r") as devkit_archive_file:
|
||||
labels_file = devkit_archive_file.extractfile(LABELS_REL_PATH)
|
||||
labels = list(map(int, labels_file.readlines()))
|
||||
|
||||
# map validation labels (idxes from LABELS_REL_PATH) into WNID compatible with training set
|
||||
meta_file = devkit_archive_file.extractfile(META_REL_PATH)
|
||||
idx_to_wnid = parse_meta_mat(meta_file)
|
||||
labels_wnid = [idx_to_wnid[idx] for idx in labels]
|
||||
|
||||
# remap WNID into index in sorted list of all WNIDs - this is how network outputs class
|
||||
available_wnids = sorted(set(labels_wnid))
|
||||
wnid_to_newidx = {wnid: new_cls for new_cls, wnid in enumerate(available_wnids)}
|
||||
labels = [wnid_to_newidx[wnid] for wnid in labels_wnid]
|
||||
|
||||
output_dir = datasets_dir / IMAGENET_DIRNAME
|
||||
with tarfile.open(image_archive_path, mode="r") as image_archive_file:
|
||||
image_rel_paths = sorted(image_archive_file.getnames())
|
||||
for cls, image_rel_path in tqdm(zip(labels, image_rel_paths), total=len(image_rel_paths)):
|
||||
output_path = output_dir / str(cls) / image_rel_path
|
||||
original_image_file = image_archive_file.extractfile(image_rel_path)
|
||||
processed_image = _process_image(original_image_file, target_size)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
processed_image.save(output_path.as_posix())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
12
TensorFlow/Classification/ConvNets/triton/requirements.txt
Normal file
|
@ -0,0 +1,12 @@
|
|||
networkx==2.5
|
||||
numpy<1.20.0,>=1.16.0 # # numpy 1.20+ requires py37+
|
||||
onnx==1.8.0
|
||||
onnxruntime==1.6.0
|
||||
pycuda>=2019.1.2
|
||||
PyYAML>=5.2
|
||||
tqdm>=4.44.1
|
||||
tf2onnx==1.8.3
|
||||
tabulate>=0.8.7
|
||||
natsort>=7.0.0
|
||||
# use tags instead of branch names - because there might be docker cache hit causing not fetching most recent changes on branch
|
||||
service_maker @ git+https://access-token:usVyg8b11sn9gCacsVCf@gitlab-master.nvidia.com/dl/JoC/service_maker.git@1b83b96#egg=service_maker
|
86
TensorFlow/Classification/ConvNets/triton/rn50_model.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
import logging
|
||||
|
||||
import tensorflow as tf
|
||||
from utils import data_utils
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
NCLASSES = 1001
|
||||
WIDTH = 224
|
||||
HEIGHT = 224
|
||||
NCHANNELS = 3
|
||||
INPUT_FORMAT = "NHWC"
|
||||
COMPUTE_FORMAT = "NHWC"
|
||||
|
||||
|
||||
def get_model(
|
||||
*,
|
||||
model_dir: str,
|
||||
arch: str = "resnet50",
|
||||
precision: str = "fp32",
|
||||
use_xla: bool = True,
|
||||
use_tf_amp: bool = False,
|
||||
use_dali: bool = False,
|
||||
gpu_memory_fraction=0.7,
|
||||
):
|
||||
import horovod.tensorflow as hvd
|
||||
from runtime import Runner
|
||||
|
||||
hvd.init()
|
||||
|
||||
try:
|
||||
dtype = {"fp16": tf.float16, "fp32": tf.float32}[precision.lower()]
|
||||
except KeyError:
|
||||
raise ValueError(f"Uknown precision {precision}. Allowed values: fp16|fp32")
|
||||
|
||||
LOGGER.info(
|
||||
f"Creating model arch={arch} precision={precision} xla={use_xla}"
|
||||
f"tf_amp={use_tf_amp}, dali={use_dali}, gpu_memory_frac={gpu_memory_fraction}"
|
||||
)
|
||||
|
||||
runner = Runner(
|
||||
n_classes=NCLASSES,
|
||||
architecture=arch,
|
||||
input_format=INPUT_FORMAT,
|
||||
compute_format=COMPUTE_FORMAT,
|
||||
dtype=dtype,
|
||||
n_channels=NCHANNELS,
|
||||
height=HEIGHT,
|
||||
width=WIDTH,
|
||||
use_xla=use_xla,
|
||||
use_tf_amp=use_tf_amp,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
gpu_id=0,
|
||||
model_dir=model_dir,
|
||||
)
|
||||
|
||||
# removed params not used in inference
|
||||
estimator_params = {"use_final_conv": False} # TODO: Why not moved to model constructor?
|
||||
estimator = runner._get_estimator(
|
||||
mode="inference",
|
||||
run_params=estimator_params,
|
||||
use_xla=use_xla,
|
||||
use_dali=use_dali,
|
||||
gpu_memory_fraction=gpu_memory_fraction,
|
||||
)
|
||||
return estimator
|
||||
|
||||
|
||||
def get_serving_input_receiver_fn(
|
||||
batch_size: int = None,
|
||||
input_dtype: str = "fp32",
|
||||
width: int = WIDTH,
|
||||
height: int = HEIGHT,
|
||||
nchannels: int = NCHANNELS,
|
||||
):
|
||||
input_dtype = tf.float16 if input_dtype and "16" in input_dtype else tf.float32
|
||||
serving_input_receiver_fn = data_utils.get_serving_input_receiver_fn(
|
||||
batch_size=batch_size,
|
||||
height=height,
|
||||
width=width,
|
||||
num_channels=nchannels,
|
||||
data_format=INPUT_FORMAT,
|
||||
dtype=input_dtype,
|
||||
)
|
||||
return serving_input_receiver_fn
|
220
TensorFlow/Classification/ConvNets/triton/run_benchmark.py
Executable file
|
@ -0,0 +1,220 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# method from PEP-366 to support relative import in executed modules
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
if __name__ == "__main__" and __package__ is None:
|
||||
__package__ = Path(__file__).parent.name
|
||||
|
||||
from .benchmark.benchmark import Benchmark
|
||||
from .benchmark.checkpoints import HttpCheckpoint
|
||||
from .benchmark.core import LOGGER
|
||||
from .benchmark.executor import DockerExecutor
|
||||
from .deployment_toolkit.core import Accelerator, Format, Precision
|
||||
|
||||
AVAILABLE_MODEL_FORMATS = [f.value for f in Format]
|
||||
AVAILABLE_MODEL_PRECISIONS = [p.value for p in Precision]
|
||||
AVAILABLE_MODEL_ACCELERATORS = [a.value for a in Accelerator]
|
||||
|
||||
def run_benchmark(
|
||||
devices: List[str],
|
||||
model_name: str,
|
||||
model_version: int,
|
||||
model_format: str,
|
||||
container_version: str,
|
||||
checkpoint: str,
|
||||
max_batch_size: int,
|
||||
precision: str,
|
||||
number_of_model_instances: int,
|
||||
preferred_batch_sizes: List[int],
|
||||
max_queue_delay_us: int,
|
||||
backend_accelerator: str,
|
||||
verbose: bool,
|
||||
**kwargs
|
||||
):
|
||||
benchmark = Benchmark(
|
||||
devices=devices,
|
||||
model_name=model_name,
|
||||
model_version=model_version,
|
||||
framework="TensorFlow1",
|
||||
container_version=container_version,
|
||||
checkpoint=HttpCheckpoint(checkpoint),
|
||||
verbose=verbose
|
||||
)
|
||||
benchmark.model_conversion(
|
||||
cmds=(
|
||||
r"""
|
||||
python3 triton/convert_model.py \
|
||||
--input-path triton/rn50_model.py \
|
||||
--input-type tf-estimator \
|
||||
--output-path ${SHARED_DIR}/model \
|
||||
--output-type ${FORMAT} \
|
||||
--onnx-opset 12 \
|
||||
--onnx-optimized 1 \
|
||||
--max-batch-size ${MAX_BATCH_SIZE} \
|
||||
--max-workspace-size 4294967296 \
|
||||
--ignore-unknown-parameters \
|
||||
\
|
||||
--model-dir ${CHECKPOINT_DIR} \
|
||||
--precision ${PRECISION} \
|
||||
--dataloader triton/dataloader.py \
|
||||
--data-dir ${DATASETS_DIR}/imagenet
|
||||
""",
|
||||
)
|
||||
)
|
||||
|
||||
benchmark.model_deploy(
|
||||
cmds=(
|
||||
r"""
|
||||
python3 triton/deploy_model.py \
|
||||
--model-repository ${MODEL_REPOSITORY_PATH} \
|
||||
--model-path ${SHARED_DIR}/model \
|
||||
--model-format ${FORMAT} \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--model-version 1 \
|
||||
--max-batch-size ${MAX_BATCH_SIZE} \
|
||||
--precision ${PRECISION} \
|
||||
--number-of-model-instances ${NUMBER_OF_MODEL_INSTANCES} \
|
||||
--max-queue-delay-us ${TRITON_MAX_QUEUE_DELAY} \
|
||||
--preferred-batch-sizes ${TRITON_PREFERRED_BATCH_SIZES} \
|
||||
--capture-cuda-graph 0 \
|
||||
--backend-accelerator ${BACKEND_ACCELERATOR} \
|
||||
--load-model ${TRITON_LOAD_MODEL_METHOD}
|
||||
""",
|
||||
)
|
||||
)
|
||||
benchmark.triton_performance_offline_tests(
|
||||
cmds=(
|
||||
r"""
|
||||
python triton/run_offline_performance_test_on_triton.py \
|
||||
--server-url ${TRITON_SERVER_URL} \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--number-of-warmup-iterations 5 \
|
||||
--input-data random \
|
||||
--batch-sizes ${BATCH_SIZE} \
|
||||
--triton-instances ${TRITON_INSTANCES} \
|
||||
--result-path ${SHARED_DIR}/triton_performance_offline.csv
|
||||
""",
|
||||
),
|
||||
result_path="${SHARED_DIR}/triton_performance_offline.csv",
|
||||
)
|
||||
benchmark.triton_performance_online_tests(
|
||||
cmds=(
|
||||
r"""
|
||||
python triton/run_online_performance_test_on_triton.py \
|
||||
--server-url ${TRITON_SERVER_URL} \
|
||||
--model-name ${MODEL_NAME} \
|
||||
--number-of-warmup-iterations 5 \
|
||||
--input-data random \
|
||||
--batch-sizes ${BATCH_SIZE} \
|
||||
--triton-instances ${TRITON_INSTANCES} \
|
||||
--number-of-model-instances ${NUMBER_OF_MODEL_INSTANCES} \
|
||||
--result-path ${SHARED_DIR}/triton_performance_online.csv
|
||||
""",
|
||||
),
|
||||
result_path="${SHARED_DIR}/triton_performance_online.csv",
|
||||
)
|
||||
|
||||
benchmark.configuration(
|
||||
precision=precision,
|
||||
max_batch_size=max_batch_size,
|
||||
format=model_format,
|
||||
accelerator=backend_accelerator,
|
||||
triton_gpu_engine_count=number_of_model_instances,
|
||||
triton_preferred_batch_sizes=preferred_batch_sizes,
|
||||
triton_max_queue_delay_us=max_queue_delay_us,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
executor = DockerExecutor()
|
||||
executor.run(benchmark)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run benchmark for model.")
|
||||
parser.add_argument("--devices", help="NVIDIA GPU device ID on which Triton Inference Server is ran. Accept multiple values", nargs="*", required=False)
|
||||
parser.add_argument("--model-name", help="Model name. Default: ResNet50", default="ResNet50", required=False)
|
||||
parser.add_argument("--model-version", default="1", help="Version of model. Default: 1", required=False)
|
||||
parser.add_argument("--checkpoint", default="https://api.ngc.nvidia.com/v2/models/nvidia/rn50_tf_amp_ckpt/versions/20.06.0/zip", help="Checkpoint url. Default: https://api.ngc.nvidia.com/v2/models/nvidia/rn50_tf_amp_ckpt/versions/20.06.0/zip", required=False)
|
||||
parser.add_argument("--container-version", help="Version of container for Triton Inference Server. Default: 20.12", default="20.12", required=False)
|
||||
parser.add_argument(
|
||||
"--model-format",
|
||||
choices=AVAILABLE_MODEL_FORMATS,
|
||||
help="Format of exported model. Default: tf-savedmodel",
|
||||
default="tf-savedmodel",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"--precision",
|
||||
type=str,
|
||||
default="fp16",
|
||||
choices=AVAILABLE_MODEL_PRECISIONS,
|
||||
help="Model precision (parameter used only by Tensorflow backend with TensorRT optimization). Default: fp16",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-batch-size",
|
||||
type=int,
|
||||
default=32,
|
||||
help="Batch size used for benchmark. Maximal batch size which is used to convert model. Default: 32",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"--number-of-model-instances",
|
||||
type=int,
|
||||
default=2,
|
||||
help="Number of model instances per GPU (model instances). Default: 2",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"--preferred-batch-sizes",
|
||||
type=int,
|
||||
nargs="*",
|
||||
help="Batch sizes that the dynamic batching should attempt to create. "
|
||||
"In case --max-queue-delay-us is set and this parameter is not, default value will be calculated based on --max-batch-size",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-queue-delay-us",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Max delay time which dynamic batch shall wait to form a batch. Default: 100",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"--backend-accelerator",
|
||||
choices=AVAILABLE_MODEL_ACCELERATORS,
|
||||
type=str,
|
||||
default="cuda",
|
||||
help="Select backend accelerator used for model. Default: cuda",
|
||||
required=False
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", default=False, help="Provide verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
log_level = logging.INFO if not args.verbose else logging.DEBUG
|
||||
LOGGER.setLevel(log_level)
|
||||
|
||||
LOGGER.info(f"args:")
|
||||
for key, value in vars(args).items():
|
||||
LOGGER.info(f" {key} = {value}")
|
||||
|
||||
run_benchmark(**vars(args))
|
135
TensorFlow/Classification/ConvNets/triton/run_inference_on_fw.py
Executable file
|
@ -0,0 +1,135 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
r"""
|
||||
To infer the model on framework runtime, you can use `run_inference_on_fw.py` script.
|
||||
It infers data obtained from pointed data loader locally and saves received data into
|
||||
[npz files](https://gitlab-master.nvidia.com/dl/JoC/bermuda-api/-/blob/develop/bermuda_api_toolset/docs/dump_files.md).
|
||||
Those files are stored in directory pointed by `--output-dir` argument.
|
||||
|
||||
Example call:
|
||||
|
||||
```shell script
|
||||
python ./triton/run_inference_on_fw.py \
|
||||
--input-path /models/exported/model.onnx \
|
||||
--input-type onnx \
|
||||
--dataloader triton/dataloader.py \
|
||||
--data-dir /data/imagenet \
|
||||
--batch-size 32 \
|
||||
--output-dir /results/dump_local \
|
||||
--dump-labels
|
||||
```
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
||||
os.environ["TF_ENABLE_DEPRECATION_WARNINGS"] = "0"
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
# method from PEP-366 to support relative import in executed modules
|
||||
if __package__ is None:
|
||||
__package__ = Path(__file__).parent.name
|
||||
|
||||
from .deployment_toolkit.args import ArgParserGenerator
|
||||
from .deployment_toolkit.core import DATALOADER_FN_NAME, BaseLoader, BaseRunner, Format, load_from_file
|
||||
from .deployment_toolkit.dump import NpzWriter
|
||||
from .deployment_toolkit.extensions import loaders, runners
|
||||
|
||||
LOGGER = logging.getLogger("run_inference_on_fw")
|
||||
|
||||
|
||||
def _verify_and_format_dump(args, ids, x, y_pred, y_real):
|
||||
data = {"outputs": y_pred, "ids": {"ids": ids}}
|
||||
if args.dump_inputs:
|
||||
data["inputs"] = x
|
||||
if args.dump_labels:
|
||||
if not y_real:
|
||||
raise ValueError(
|
||||
"Found empty label values. Please provide labels in dataloader_fn or do not use --dump-labels argument"
|
||||
)
|
||||
data["labels"] = y_real
|
||||
return data
|
||||
|
||||
|
||||
def _parse_and_validate_args():
|
||||
supported_inputs = set(runners.supported_extensions) & set(loaders.supported_extensions)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Dump local inference output of given model", allow_abbrev=False)
|
||||
parser.add_argument("--input-path", help="Path to input model", required=True)
|
||||
parser.add_argument("--input-type", help="Input model type", choices=supported_inputs, required=True)
|
||||
parser.add_argument("--dataloader", help="Path to python file containing dataloader.", required=True)
|
||||
parser.add_argument("--output-dir", help="Path to dir where output files will be stored", required=True)
|
||||
parser.add_argument("--dump-labels", help="Dump labels to output dir", action="store_true", default=False)
|
||||
parser.add_argument("--dump-inputs", help="Dump inputs to output dir", action="store_true", default=False)
|
||||
parser.add_argument("-v", "--verbose", help="Verbose logs", action="store_true", default=False)
|
||||
|
||||
args, *_ = parser.parse_known_args()
|
||||
|
||||
get_dataloader_fn = load_from_file(args.dataloader, label="dataloader", target=DATALOADER_FN_NAME)
|
||||
ArgParserGenerator(get_dataloader_fn).update_argparser(parser)
|
||||
|
||||
Loader: BaseLoader = loaders.get(args.input_type)
|
||||
ArgParserGenerator(Loader, module_path=args.input_path).update_argparser(parser)
|
||||
|
||||
Runner: BaseRunner = runners.get(args.input_type)
|
||||
ArgParserGenerator(Runner).update_argparser(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
types_requiring_io_params = []
|
||||
|
||||
if args.input_type in types_requiring_io_params and not all(p for p in [args.inputs, args.outputs]):
|
||||
parser.error(f"For {args.input_type} input provide --inputs and --outputs parameters")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = _parse_and_validate_args()
|
||||
|
||||
log_level = logging.INFO if not args.verbose else logging.DEBUG
|
||||
log_format = "%(asctime)s %(levelname)s %(name)s %(message)s"
|
||||
logging.basicConfig(level=log_level, format=log_format)
|
||||
|
||||
LOGGER.info(f"args:")
|
||||
for key, value in vars(args).items():
|
||||
LOGGER.info(f" {key} = {value}")
|
||||
|
||||
Loader: BaseLoader = loaders.get(args.input_type)
|
||||
Runner: BaseRunner = runners.get(args.input_type)
|
||||
|
||||
loader = ArgParserGenerator(Loader, module_path=args.input_path).from_args(args)
|
||||
runner = ArgParserGenerator(Runner).from_args(args)
|
||||
LOGGER.info(f"Loading {args.input_path}")
|
||||
model = loader.load(args.input_path)
|
||||
with runner.init_inference(model=model) as runner_session, NpzWriter(args.output_dir) as writer:
|
||||
get_dataloader_fn = load_from_file(args.dataloader, label="dataloader", target=DATALOADER_FN_NAME)
|
||||
dataloader_fn = ArgParserGenerator(get_dataloader_fn).from_args(args)
|
||||
LOGGER.info(f"Data loader initialized; Running inference")
|
||||
for ids, x, y_real in tqdm(dataloader_fn(), unit="batch", mininterval=10):
|
||||
y_pred = runner_session(x)
|
||||
data = _verify_and_format_dump(args, ids=ids, x=x, y_pred=y_pred, y_real=y_real)
|
||||
writer.write(**data)
|
||||
LOGGER.info(f"Inference finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|