DeepLearningExamples/PyTorch/Recommendation/NCF/logger/analyzer.py
Przemek Strzelczyk 0663b67c1a Updating models
2019-07-08 22:51:28 +02:00

126 lines
4.3 KiB
Python

# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from collections import defaultdict
import json
from logger import logger as nvl
from logger.parser import NVLogParser
from logger import tags
def collect_by_scope(loglines):
# dict to gather run scope results
run_stats = dict()
epoch_stats = dict()
iteration_stats = dict()
# gather all lines with run_scope events & variables
run_events = dict((l.tag, l) for l in loglines if l.scope == nvl.RUN_SCOPE)
# gather all variable tags
run_variables = dict(k for k in run_events.items() if k[1].value is not None)
# find all time block names
timed_blocks = [k[:-6] for k in run_events if k.endswith('_start')]
# measure times for the run scope
for prefix in timed_blocks:
# only when both start & stop are found
if prefix + "_start" in run_events and prefix + "_stop" in run_events:
start = run_events[prefix + "_start"].timestamp
stop = run_events[prefix + "_stop"].timestamp
run_stats[prefix + "_time"] = stop - start
# collect all variables - even nested
for k in run_variables:
e = run_events[k]
if isinstance(e.value, dict):
for d in e.value.keys():
run_stats[k + "_" + d] = e.value[d]
else:
run_stats[k] = e.value
# find epochs
epochs = sorted(list({int(l.epoch) for l in loglines if int(l.epoch) >= 0}))
epoch_stats['x'] = epochs
# gather eval_accuracy
eval_accuracy_dup = [l.value for l in loglines if l.tag == tags.EVAL_ACCURACY]
eval_accuracy = [l['value'] for l in eval_accuracy_dup]
epoch_stats['eval_accuracy'] = eval_accuracy
# gather it_per_sec
eval_it_per_sec = [l.value for l in loglines if l.tag == tags.PERF_IT_PER_SEC]
epoch_stats['it_per_sec'] = eval_it_per_sec
# gather all epoch-iter tuples
all_iterations = {(int(l.epoch), int(l.iteration)) for l in loglines if int(l.iteration) >= 0}
# group by epoch
collected_iterations = defaultdict(list)
for el in all_iterations:
collected_iterations[el[0]].append(el[1])
# convert to list of lists
iterations = [sorted(collected_iterations[k]) for k in sorted(collected_iterations.keys())]
iteration_stats['x'] = iterations
# gather all epoch-iter-loss triples
all_loss_dicts = [l.value for l in loglines if l.tag == tags.TRAIN_ITERATION_LOSS]
all_loss = {(l['epoch'], l['iteration'], l['value']) for l in all_loss_dicts}
# group by epoch
collected_loss = defaultdict(list)
for el in all_loss:
collected_loss[el[0]].append(el[2])
# convert to list of lists
iterations_loss = [sorted(collected_loss[k]) for k in sorted(collected_loss.keys())]
iteration_stats['loss'] = iterations_loss
# find epoch events and variables
epoch_events = [l for l in loglines if l.scope == nvl.EPOCH_SCOPE]
epoch_event_names = {l.tag for l in epoch_events}
epoch_timed_blocks = {k[:-6] for k in epoch_event_names if k.endswith('_start')}
epoch_variables = {l.tag for l in epoch_events if l.value is not None}
return {"run" : run_stats, "epoch": epoch_stats, "iter" : iteration_stats}
def analyze(input_path, output_path=None):
parser = NVLogParser()
loglines, errors, worker_loglines = parser.parse_file(input_path)
stats = collect_by_scope(worker_loglines['(0)'])
if not output_path:
print(json.dumps(stats, indent=4))
else:
with open(output_path, 'w') as f:
json.dump(obj=stats, fp=f, indent=4)
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage: analyzer.py FILENAME')
print(' tests analyzing on the file.')
sys.exit(1)
analyze(input_path=sys.argv[1], output_path=None)