Source code for template.runner.bidimensional.bidimensional

"""
This file is the template for the boilerplate of train/test of a DNN on a bidimensional dataset
In particular, it is designed to work with clouds of bi-dimensional points.
"""

# Utils
import logging
import sys
import numpy as np

# Torch
import torch
from torch import nn

# DeepDIVA
import models

# Delegated
from template.runner.image_classification import ImageClassification, evaluate, train
from template.setup import set_up_model, set_up_dataloaders
from util.misc import checkpoint, adjust_learning_rate
from util.visualization.decision_boundaries import plot_decision_boundaries


#######################################################################################################################
[docs]class Bidimensional(ImageClassification):
[docs] @staticmethod def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, checkpoint_all_epochs, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- writer : Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : string Path to where logs/checkpoints are saved model_name : string Name of the model epochs : int Number of epochs to train lr : float Value for learning rate kwargs : dict Any additional arguments. decay_lr : boolean Decay the lr flag validation_interval: int Run evaluation on validation set every N epochs checkpoint_all_epochs : bool If enabled, save checkpoint after every epoch. Returns ------- train_value : ndarray[floats] of size (1, `epochs`) Accuracy values for train split val_value : ndarray[floats] of size (1, `epochs`+1) Accuracy values for validation split test_value : float Accuracy value for test split """ # Get the selected model model_expected_input_size = models.__dict__[model_name]().expected_input_size Bidimensional._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format(model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader, num_classes = set_up_dataloaders(model_expected_input_size, **kwargs) # Setting up model, optimizer, criterion model, criterion, optimizer, best_value, start_epoch = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Core routine logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) # Make data for points grid_resolution = 100 mini_batches = np.array([input_mini_batch.numpy() for input_mini_batch, _ in val_loader]) val_coords = np.squeeze(np.array([sample for mini_batch in mini_batches for sample in mini_batch])) min_x, min_y = np.min(val_coords[:, 0]), np.min(val_coords[:, 1]) max_x, max_y = np.max(val_coords[:, 0]), np.max(val_coords[:, 1]) coords = np.array([[x, y] for x in np.linspace(min_x, max_x, grid_resolution) for y in np.linspace(min_y, max_y, grid_resolution) ]) coords = torch.autograd.Variable(torch.from_numpy(coords).type(torch.FloatTensor)) if not kwargs['no_cuda']: coords = coords.cuda(non_blocking=True) # PLOT: decision boundary routine Bidimensional._evaluate_and_plot_decision_boundary(model=model, val_coords=val_coords, coords=coords, grid_resolution=grid_resolution, val_loader=val_loader, num_classes=num_classes, writer=writer, epoch=-1, epochs=epochs, **kwargs) val_value[-1] = Bidimensional._validate(val_loader, model, criterion, writer, -1, **kwargs) # Add model parameters to Tensorboard for name, param in model.named_parameters(): writer.add_histogram(name + '_-1', param.clone().cpu().data.numpy(), -1, bins='auto') for epoch in range(start_epoch, epochs): # Train train_value[epoch] = Bidimensional._train(train_loader, model, criterion, optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = Bidimensional._validate(val_loader, model, criterion, writer, epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, decay_lr_epochs=decay_lr) best_value = checkpoint(epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, checkpoint_all_epochs=checkpoint_all_epochs) # PLOT: decision boundary routine Bidimensional._evaluate_and_plot_decision_boundary(model=model, val_coords=val_coords, coords=coords, grid_resolution=grid_resolution, val_loader=val_loader, num_classes=num_classes, writer=writer, epoch=epoch, epochs=epochs, **kwargs) # Add model parameters to Tensorboard for name, param in model.named_parameters(): writer.add_histogram(name + '_{}'.format(epoch), param.clone().cpu().data.numpy(), epoch, bins='auto') # Test test_value = Bidimensional._test(test_loader, model, criterion, writer, epochs, **kwargs) logging.info('Training completed') return train_value, val_value, test_value
#################################################################################################################### @staticmethod def _validate_model_input_size(model_expected_input_size, model_name): """ This method verifies that the model expected input size is an int. This is necessary to avoid confusion with models which run on other types of data. Parameters ---------- model_expected_input_size The item retrieved from the model which corresponds to the expected input size model_name : String Name of the model (logging purpose only) Returns ------- None """ if type(model_expected_input_size) is not int or model_expected_input_size is not 2: logging.error('Model {model_name} expected input size is not bidimensional (2). ' 'Received: {model_expected_input_size}' .format(model_name=model_name, model_expected_input_size=model_expected_input_size)) sys.exit(-1) @staticmethod def _evaluate_and_plot_decision_boundary(model, val_coords, coords, grid_resolution, val_loader, num_classes, writer, epoch, no_cuda, epochs, **kwargs): """ This routine is responsible for creating the visualization "decision boundaries". See https://diva-dia.github.io/DeepDIVAweb/articles/visualize-results/ for more details about it. Parameters ---------- model : nn.module The model val_coords : list List of all validation points coords : torch.autograd.Variable List of all the points to be evaluated on the grid grid_resolution : int How many points per axis on the grid val_loader : torch.utils.data.DataLoader The dataloader of the validation set (to extract the label of the points) num_classes : int Number of classes (mainly for coloring purposed writer : Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. epoch : int The current epoch epochs : int Number of the epoch no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. Returns ------- None """ # Look for the extreme boundaries of the validation set min_x, min_y = np.min(val_coords[:, 0]), np.min(val_coords[:, 1]) max_x, max_y = np.max(val_coords[:, 0]), np.max(val_coords[:, 1]) # Create a list of points in a grid fashion grid_x = np.linspace(min_x, max_x, grid_resolution) grid_y = np.linspace(min_y, max_y, grid_resolution) grid_x, grid_y = np.meshgrid(grid_x, grid_y) # Softmax for deciding the final color of the prediction sm = nn.Softmax(dim=0) # Forward pass on the points if not no_cuda: outputs = model(coords) outputs = sm(outputs) outputs = outputs.data.cpu().numpy() else: outputs = sm(model(coords)).data.numpy() # Get classes and confidence for each point output_winners = np.array([np.argmax(item) for item in outputs]) outputs_confidence = np.array([outputs[i, item] for i, item in enumerate(output_winners)]) # Create plot plot_decision_boundaries(output_winners=output_winners, output_confidence=outputs_confidence, grid_x=grid_x, grid_y=grid_y, point_x=val_coords[:, 0], point_y=val_coords[:, 1], point_class=val_loader.dataset.data[:, 2], num_classes=num_classes, step=epoch, writer=writer, epochs=epochs, **kwargs) #################################################################################################################### """ These methods delegate their function to other classes in image_classification package. It is useful because sub-classes can selectively change the logic of certain parts only. """ @classmethod def _train(cls, train_loader, model, criterion, optimizer, writer, epoch, **kwargs): return train.train(train_loader, model, criterion, optimizer, writer, epoch, **kwargs) @classmethod def _validate(cls, val_loader, model, criterion, writer, epoch, **kwargs): return evaluate.validate(val_loader, model, criterion, writer, epoch, **kwargs) @classmethod def _test(cls, test_loader, model, criterion, writer, epoch, **kwargs): return evaluate.test(test_loader, model, criterion, writer, epoch, **kwargs)