Source code for template.runner.multi_label_image_classification.evaluate

# Utils
import logging
import time
import warnings

import numpy as np
# Torch related stuff
import torch
from sklearn.metrics import confusion_matrix, classification_report
from tqdm import tqdm

from util.evaluation.metrics import accuracy
# DeepDIVA
from util.misc import AverageMeter, _prettyprint_logging_label, save_image_and_log_to_tensorboard
from util.visualization.confusion_matrix_heatmap import make_heatmap


[docs]def validate(val_loader, model, criterion, writer, epoch, no_cuda=False, log_interval=20, **kwargs): """Wrapper for _evaluate() with the intent to validate the model.""" return _evaluate(val_loader, model, criterion, writer, epoch, 'val', no_cuda, log_interval, **kwargs)
[docs]def test(test_loader, model, criterion, writer, epoch, no_cuda=False, log_interval=20, **kwargs): """Wrapper for _evaluate() with the intent to test the model""" return _evaluate(test_loader, model, criterion, writer, epoch, 'test', no_cuda, log_interval, **kwargs)
def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) with torch.no_grad(): for batch_idx, (input, target) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # Compute output output = model(input) # Compute and record the loss loss = criterion(output, target) losses.update(loss.item(), input.size(0)) # Apply sigmoid and take everything above a threshold of 0.5 squashed_output = torch.nn.Sigmoid()(output).data.cpu().numpy() target_vals = target.cpu().numpy().astype(np.int) # jss = compute_jss(target_vals, get_preds_from_minibatch(squashed_output)) # top1.update(jss, input.size(0)) # Store results of each minibatch _ = [preds.append(item) for item in get_preds_from_minibatch(squashed_output)] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.item(), epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_jaccard_similarity', jss, epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.item(), epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_jaccard_similarity_{}'.format(multi_run), jss, # epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(data_loader))) pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), # JSS='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Generate a classification report for each epoch targets = np.array(targets).astype(np.int) preds = np.array(preds).astype(np.int) _log_classification_report(data_loader, epoch, preds, targets, writer) jss_epoch = compute_jss(targets, preds) # try: # np.testing.assert_approx_equal(jss_epoch, top1.avg) # except: # logging.error('Computed JSS scores do not match') # logging.error('JSS: {} Avg: {}'.format(jss_epoch, top1.avg)) # # Logging the epoch-wise JSS if multi_run is None: writer.add_scalar(logging_label + '/loss', losses.avg, epoch) writer.add_scalar(logging_label + '/jaccard_similarity', jss_epoch, epoch) else: writer.add_scalar(logging_label + '/loss_{}'.format(multi_run), losses.avg, epoch) writer.add_scalar(logging_label + '/jaccard_similarity_{}'.format(multi_run), jss_epoch, epoch) logging.info(_prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'JSS={jss_epoch:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)' .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, jss_epoch=jss_epoch)) return jss_epoch
[docs]def get_preds_from_minibatch(minibatch): preds = [] for row in minibatch: tmp = [1 if item > 0.5 else 0 for item in row] preds.append(tmp) preds = np.array(preds).astype(np.int) return preds
[docs]def compute_jss(target, preds): score = 0 num_classes = len(target[0]) for i in range(num_classes): score += jaccard_similarity_score(target[:,i], preds[:,i]) score = score/num_classes return score
[docs]def jaccard_similarity_score(targets, preds): assert len(targets) == len(preds) assert len(targets.shape) == 1 assert len(preds.shape) == 1 locs_targets = set(np.where(targets == 1)[0]) locs_preds = set(np.where(preds == 1)[0]) try: score = len(locs_targets.intersection(locs_preds)) / len(locs_targets.union(locs_preds)) except: print('Exception!') return score
def _log_classification_report(data_loader, epoch, preds, targets, writer): """ This routine computes and prints on Tensorboard TEXT a classification report with F1 score, Precision, Recall and similar metrics computed per-class. Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set epoch : int Number of the epoch (for logging purposes) preds : list List of all predictions of the model for this epoch targets : list List of all correct labels for this epoch writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. Returns ------- None """ with warnings.catch_warnings(): warnings.simplefilter('ignore') classification_report_string = str(classification_report(y_true=targets, y_pred=preds, target_names=[str(item) for item in data_loader.dataset.classes])) # Fix for TB writer. Its an ugly workaround to have it printed nicely in the TEXT section of TB. classification_report_string = classification_report_string.replace('\n ', '\n\n ') classification_report_string = classification_report_string.replace('precision', ' precision', 1) classification_report_string = classification_report_string.replace('avg', ' avg', 1) writer.add_text('Classification Report for epoch {}\n'.format(epoch), '\n' + classification_report_string, epoch)