[SSD/PyT] Fixes
This commit is contained in:
parent
248927e6fd
commit
58df33d1a2
|
@ -169,7 +169,7 @@ def train(train_loop_func, logger, args):
|
|||
loss_func.cuda()
|
||||
|
||||
optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate,
|
||||
momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1)
|
||||
if args.amp:
|
||||
ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2')
|
||||
|
@ -198,8 +198,8 @@ def train(train_loop_func, logger, args):
|
|||
acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args)
|
||||
if args.local_rank == 0:
|
||||
print('Model precision {} mAP'.format(acc))
|
||||
|
||||
return
|
||||
|
||||
mean, std = generate_mean_std(args)
|
||||
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
|
|
|
@ -93,6 +93,7 @@ class Logger:
|
|||
DLLogger.log((), self.summary)
|
||||
|
||||
def update_iter(self, epoch, iteration, loss):
|
||||
self.epoch = epoch
|
||||
self.train_iter = iteration
|
||||
self.train_loss_logger.update_iter(loss)
|
||||
if iteration % self.log_interval == 0:
|
||||
|
|
|
@ -20,7 +20,6 @@ from SSD import _C as C
|
|||
from apex import amp
|
||||
|
||||
def train_loop(model, loss_func, epoch, optim, train_dataloader, val_dataloader, encoder, iteration, logger, args, mean, std):
|
||||
# for nbatch, (img, _, img_size, bbox, label) in enumerate(train_dataloader):
|
||||
for nbatch, data in enumerate(train_dataloader):
|
||||
img = data[0][0][0]
|
||||
bbox = data[0][1][0]
|
||||
|
@ -82,8 +81,8 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
|
|||
start_time = None
|
||||
# tensor for results
|
||||
result = torch.zeros((1,)).cuda()
|
||||
for i, data in enumerate(loop(train_dataloader)):
|
||||
if i >= args.benchmark_warmup:
|
||||
for nbatch, data in enumerate(loop(train_dataloader)):
|
||||
if nbatch >= args.benchmark_warmup:
|
||||
torch.cuda.synchronize()
|
||||
start_time = time.time()
|
||||
|
||||
|
@ -109,6 +108,7 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
|
|||
continue
|
||||
bbox, label = C.box_encoder(N, bbox, bbox_offsets, label, encoder.dboxes.cuda(), 0.5)
|
||||
|
||||
# output is ([N*8732, 4], [N*8732], need [N, 8732, 4], [N, 8732] respectively
|
||||
M = bbox.shape[0] // N
|
||||
bbox = bbox.view(N, M, 4)
|
||||
label = label.view(N, M)
|
||||
|
@ -141,13 +141,12 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
|
|||
optim.step()
|
||||
optim.zero_grad()
|
||||
|
||||
if i >= args.benchmark_warmup + args.benchmark_iterations:
|
||||
if nbatch >= args.benchmark_warmup + args.benchmark_iterations:
|
||||
break
|
||||
|
||||
if i >= args.benchmark_warmup:
|
||||
if nbatch >= args.benchmark_warmup:
|
||||
torch.cuda.synchronize()
|
||||
logger.update(args.batch_size, time.time() - start_time)
|
||||
|
||||
logger.update(args.batch_size*args.N_gpu, time.time() - start_time)
|
||||
|
||||
result.data[0] = logger.print_result()
|
||||
if args.N_gpu > 1:
|
||||
|
@ -156,7 +155,6 @@ def benchmark_train_loop(model, loss_func, epoch, optim, train_dataloader, val_d
|
|||
print('Training performance = {} FPS'.format(float(result.data[0])))
|
||||
|
||||
|
||||
|
||||
def loop(dataloader, reset=True):
|
||||
while True:
|
||||
for data in dataloader:
|
||||
|
|
Loading…
Reference in a new issue