DeepLearningExamples/TensorFlow/Translation/GNMT/scripts/parse_log.py

# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import re
import sys
import json
from pathlib import Path
from subprocess import Popen, PIPE

parser = argparse.ArgumentParser(description='Parse training logs')
parser.add_argument('log', help='path to log file', type=Path)
args = parser.parse_args()

content = args.log.read_bytes()

bleu = list(map(lambda x: float(x[0]), re.findall(rb'\nbleu is ((\d|.)+)', content)))

training_speed = re.findall(rb'\ntraining time for epoch (\d+): ((\d|.)+) mins \(((\d|.)+) sent/sec, ((\d|.)+) tokens/sec\)', content)
training_tokens = list(map(lambda x: float(x[5]), training_speed))
training_sentences = list(map(lambda x: float(x[3]), training_speed))

eval_speed = re.findall(rb'\neval time for epoch (\d+): ((\d|.)+) mins \(((\d|.)+) sent/sec, ((\d|.)+) tokens/sec\)', content)
if not eval_speed:
    eval_speed = re.findall(rb'\neval time for ckpt(): ((\d|.)+) mins \(((\d|.)+) sent/sec, ((\d|.)+) tokens/sec\)', content)
eval_tokens = list(map(lambda x: float(x[5]), eval_speed))
eval_sentences = list(map(lambda x: float(x[3]), eval_speed))

experiment_duration = float(re.findall(rb'\nExperiment took ((\d|.)+) min', content)[0][0])

ret = {}
ret['bleu'] = bleu
ret['training_tokens_per_sec'] = training_tokens
ret['training_sentences_per_sec'] = training_sentences
ret['eval_tokens_per_sec'] = eval_tokens
ret['eval_sentences_per_sec'] = eval_sentences
ret['duration'] = experiment_duration

print(json.dumps(ret))