[SSD/PyT] Fixes

This commit is contained in:
Andrei Shumak 2021-08-30 12:47:32 -07:00 committed by Krzysztof Kudrynski
parent 248927e6fd
commit 58df33d1a2
3 changed files with 9 additions and 10 deletions

View file

@ -169,7 +169,7 @@ def train(train_loop_func, logger, args):
loss_func.cuda()
optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate,
momentum=args.momentum, weight_decay=args.weight_decay)
momentum=args.momentum, weight_decay=args.weight_decay)
scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1)
if args.amp:
ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2')
@ -198,8 +198,8 @@ def train(train_loop_func, logger, args):
acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)
if args.local_rank == 0:
print('Model precision {} mAP'.format(acc))
return
mean, std = generate_mean_std(args)
for epoch in range(start_epoch, args.epochs):

View file

@ -93,6 +93,7 @@ class Logger:
DLLogger.log((), self.summary)
def update_iter(self, epoch, iteration, loss):
self.epoch = epoch
self.train_iter = iteration
self.train_loss_logger.update_iter(loss)
if iteration % self.log_interval == 0:

View file

@ -20,7 +20,6 @@ from SSD import _C as C
from apex import amp
def train_loop(model, loss_func, epoch, optim, train_dataloader, val_dataloader, encoder, iteration, logger, args, mean, std):
# for nbatch, (img, _, img_size, bbox, label) in enumerate(train_dataloader):
for nbatch, data in enumerate(train_dataloader):
img = data[0][0][0]
bbox = data[0][1][0]
@ -82,8 +81,8 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
start_time = None
# tensor for results
result = torch.zeros((1,)).cuda()
for i, data in enumerate(loop(train_dataloader)):
if i >= args.benchmark_warmup:
for nbatch, data in enumerate(loop(train_dataloader)):
if nbatch >= args.benchmark_warmup:
torch.cuda.synchronize()
start_time = time.time()
@ -109,6 +108,7 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
continue
bbox, label = C.box_encoder(N, bbox, bbox_offsets, label, encoder.dboxes.cuda(), 0.5)
# output is ([N*8732, 4], [N*8732], need [N, 8732, 4], [N, 8732] respectively
M = bbox.shape[0] // N
bbox = bbox.view(N, M, 4)
label = label.view(N, M)
@ -141,13 +141,12 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
optim.step()
optim.zero_grad()
if i >= args.benchmark_warmup + args.benchmark_iterations:
if nbatch >= args.benchmark_warmup + args.benchmark_iterations:
break
if i >= args.benchmark_warmup:
if nbatch >= args.benchmark_warmup:
torch.cuda.synchronize()
logger.update(args.batch_size, time.time() - start_time)
logger.update(args.batch_size*args.N_gpu, time.time() - start_time)
result.data[0] = logger.print_result()
if args.N_gpu > 1:
@ -156,7 +155,6 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
print('Training performance = {} FPS'.format(float(result.data[0])))
def loop(dataloader, reset=True):
while True:
for data in dataloader: