DeepLearningExamples/PyTorch/Detection/SSD/qa/qa_utils.py

116 lines
4.8 KiB
Python

import json
# terminal stdout colors
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
# load results and benchmark
def load_json(filepath):
with open(filepath) as f:
data = json.load(f)
return data
def save_json(filepath, data):
with open(filepath, 'w') as f:
json.dump(data, f)
# compare func
def compare(measured_value, true_value, pmargin=0.1):
assert 0 < pmargin < 1, 'Margin should be in range [0, 1]'
return (1 - pmargin) * true_value < measured_value
# compare 2 benchmark json files
def compare_benchmarks(results, benchmark, args, pmargin=0.1):
# sanity check
for metric in results['metric_keys']:
if metric not in benchmark['metric_keys']:
assert False, "You want to compare {} metric which doesn't appear in benchmark file".format(metric)
assert len(args.bs) <= len(benchmark['bs']), 'len(args.bs) <= len(benchmark["bs"] ({} <= {})'.format(len(args.bs),
len(benchmark[
'bs']))
assert len(args.bs) == len(results['bs']), 'len(args.bs) <= len(results["bs"] ({} == {})'.format(len(args.bs),
len(results['bs']))
for bs in results['bs']:
if bs not in benchmark['bs']:
assert False, "You want to compare batch size = {} which doesn't appear in benchmark file".format(bs)
assert len(args.ngpus) <= len(benchmark['ngpus']), 'len(args.ngpus) <= len(benchmark["ngpus"]) ({} <= {})'.format(
len(args.bs), len(benchmark['ngpus']))
assert len(args.ngpus) == len(results['ngpus']), 'len(args.ngpus) == len(results["ngpus"]) ({} == {})'.format(
len(args.bs), len(results['ngpus']))
for gpu in results['ngpus']:
if gpu not in benchmark['ngpus']:
assert False, "You want to compare {} gpus results which don't appear in benchmark file".format(gpu)
# compare measured numbers with benchmark
exit = 0
for metric in results['metric_keys']:
for gpu in results['ngpus']:
for bs in results['bs']:
measured_metric = results['metrics'][str(gpu)][str(bs)][metric]
ground_truth_metric = benchmark['metrics'][str(gpu)][str(bs)][metric]
ok = compare(measured_metric, ground_truth_metric, pmargin)
if ok:
print(OKGREEN + 'BENCHMARK PASSED: metric={} gpu={} bs={}'.format(metric, gpu, bs) + ENDC)
else:
print(FAIL + 'BENCHMARK NOT PASSED: metric={} gpu={} bs={}'.format(metric, gpu, bs) + ENDC)
exit = 1
return exit
# compare 2 benchmark json files
def compare_acc(results, benchmark, args):
# sanity check
for metric in results['metric_keys']:
if metric not in benchmark['metric_keys']:
assert False, "You want to compare {} metric which doesn't appear in benchmark file".format(metric)
for bs in results['bs']:
if bs not in benchmark['bs']:
assert False, "You want to compare batch size = {} which doesn't appear in benchmark file".format(bs)
for gpu in results['ngpus']:
if gpu not in benchmark['ngpus']:
assert False, "You want to compare {} gpus results which don't appear in benchmark file".format(gpu)
# compare measured numbers with benchmark
for i, (result, ground_truth) in enumerate(zip(results['metrics']['val.acc'], benchmark['metrics']['val.acc'])):
if i > 43: # before first decay accuracy tends to vary more than 15% at ~30th epoch
if ground_truth * 0.9 > result:
print(FAIL + 'ACCURACY TEST NOT PASSED' + ENDC)
return 1
# compare measured numbers with benchmark
for i, (result, ground_truth) in enumerate(zip(results['metrics']['train.loss'], benchmark['metrics']['train.loss'])):
if i > 43:
if ground_truth * 1.1 < result:
print(FAIL + 'LOSS TEST NOT PASSED' + ENDC)
return 1
print(OKGREEN + 'ACCURACY TEST PASSED' + ENDC)
return 0
def create_json_file(val_acc_results, train_loss_results, ngpus=8, bs=32):
results = {"ngpus": [ngpus],
"bs": [bs],
"metric_keys": ["train.loss", "val.acc"],
"metrics": {
"train.loss": [],
"val.acc": []
}
}
for i, ((epoch1, acc), (epoch2, loss)) in enumerate(zip(val_acc_results, train_loss_results)):
assert i == epoch1 == epoch2
results['metrics']['train.loss'].append(loss)
results['metrics']['val.acc'].append(acc)
return results