Source code for template.runner.triplet.evaluate

# Utils
import datetime
import json
import logging
import time
from sklearn.metrics import pairwise_distances_chunked
import numpy as np

# Torch related stuff
import torch
from tqdm import tqdm

# DeepDIVA
from util.evaluation.metrics import compute_mapk


[docs]def validate(val_loader, model, writer, epoch, no_cuda=False, log_interval=20, **kwargs): """Wrapper for _evaluate() with the intent to validate the model.""" return _evaluate_map(val_loader, model, writer, epoch, 'val', no_cuda, log_interval, **kwargs)
[docs]def test(test_loader, model, writer, epoch, no_cuda=False, log_interval=20, **kwargs): """Wrapper for _evaluate() with the intent to test the model""" return _evaluate_map(test_loader, model, writer, epoch, 'test', no_cuda, log_interval, **kwargs)
def _evaluate_map(data_loader, model, writer, epoch, logging_label, no_cuda, log_interval, map, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. map : str Specify value for mAP computation. Possible values are ("auto", "full" or specify K for AP@K) Returns ------- mAP : float Mean average precision for evaluated on this split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Switch to evaluate mode (turn off dropout & such ) model.eval() labels, outputs = [], [] # For use with the multi-crop transform multi_crop = False # Iterate over whole evaluation set pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) with torch.no_grad(): for batch_idx, (data, label) in pbar: # Check if data is provided in multi-crop form and process accordingly if len(data.size()) == 5: multi_crop = True bs, ncrops, c, h, w = data.size() data = data.view(-1, c, h, w) if not no_cuda: data = data.cuda() # Compute output out = model(data) if multi_crop: out = out.view(bs, ncrops, -1).mean(1) # Store output outputs.append(out.data.cpu().numpy()) labels.append(label.data.cpu().numpy()) # Log progress to console if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' Epoch: {} [{}/{} ({:.0f}%)]'.format( epoch, batch_idx * len(data), len(data_loader.dataset), 100. * batch_idx / len(data_loader))) # Measure accuracy (FPR95) num_tests = len(data_loader.dataset.file_names) labels = np.concatenate(labels, 0).reshape(num_tests) outputs = np.concatenate(outputs, 0) # Cosine similarity distance distances = pairwise_distances_chunked(outputs, metric='cosine', n_jobs=16) logging.debug('Computed pairwise distances') t = time.time() mAP, per_class_mAP = compute_mapk(distances, labels, k=map) writer.add_text('Per class mAP at epoch {}\n'.format(epoch), json.dumps(per_class_mAP, indent=2, sort_keys=True)) logging.debug('Completed evaluation of mAP in {}'.format(datetime.timedelta(seconds=int(time.time() - t)))) logging.info('\33[91m ' + logging_label + ' set: mAP: {}\n\33[0m'.format(mAP)) # Logging the epoch-wise accuracy if multi_run is None: writer.add_scalar(logging_label + '/mAP', mAP, epoch) else: writer.add_scalar(logging_label + '/mAP{}'.format(multi_run), mAP, epoch) return mAP