Source code for template.runner.apply_model.setup

# Utils
import logging
import os

# Torch related stuff
import torch.nn.parallel
import torch.optim
import torchvision.transforms as transforms

# DeepDIVA
from datasets.image_folder_dataset import ImageFolderApply
from template.runner.triplet.transforms import MultiCrop
from template.setup import _load_mean_std_from_file

[docs]def set_up_dataloader(model_expected_input_size, dataset_folder, batch_size, workers, inmem, multi_crop, classify, **kwargs): """ Set up the dataloaders for the specified datasets. Parameters ---------- :param model_expected_input_size: tuple Specify the height and width that the model expects. :param dataset_folder: string Path string that points to the three folder train/val/test. Example: ~/../../data/svhn :param batch_size: int Number of datapoints to process at once :param workers: int Number of workers to use for the dataloaders :param inmem: boolean Flag: if False, the dataset is loaded in an online fashion i.e. only file names are stored and images are loaded on demand. This is slower than storing everything in memory. :param multi_crop: int if None, the MultiCrop transform is not applied to the data. Otherwise, multi_crop contains an integer which specifies how many crops to make from each image. :param classify : boolean Specifies whether to generate a classification report for the data or not. :param kwargs: dict Any additional arguments. :return: dataloader, dataloader, dataloader, int Three dataloaders for train, val and test. Number of classes for the model. """ # Recover dataset name dataset = os.path.basename(os.path.normpath(dataset_folder))'Loading {} from:{}'.format(dataset, dataset_folder)) # Load the dataset as images apply_ds = ImageFolderApply(path=dataset_folder, classify=classify) # Loads the analytics csv and extract mean and std try: mean, std = _load_mean_std_from_file(dataset_folder, inmem, workers, kwargs['runner_class']) except: logging.error('analytics.csv not found in folder. Please copy the one generated in the ' 'training folder to this folder.') logging.error('Currently normalizing with 0.5 for all channels for mean and std.') mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5] # Set up dataset transforms logging.debug('Setting up dataset transforms') if multi_crop == None: apply_ds.transform = transforms.Compose([ transforms.Resize(model_expected_input_size), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) else: apply_ds.transform = transforms.Compose([ MultiCrop(size=model_expected_input_size, n_crops=multi_crop), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=mean, std=std)(crop) for crop in crops])), ]) apply_loader =, shuffle=False, batch_size=batch_size, num_workers=workers, pin_memory=True) return apply_loader, len(apply_ds.classes)
# # def set_up_model(output_channels, model_name, pretrained, optimizer_name, no_cuda, resume, load_model, start_epoch, # train_loader, disable_databalancing, dataset_folder, inmem, workers, num_classes=None, **kwargs): # """ # Instantiate model, optimizer, criterion. Load a pretrained model or resume from a checkpoint. # # Parameters # ---------- # output_channels : int # Specify shape of final layer of network. Only used if num_classes is not specified. # # model_name : string # Name of the model # # pretrained : bool # Specify whether to load a pretrained model or not # # optimizer_name : string # Name of the optimizer # # lr: float # Value for learning rate # # no_cuda : bool # Specify whether to use the GPU or not # # resume : string # Path to a saved checkpoint # # load_model : string # Path to a saved model # # start_epoch : int # Epoch from which to resume training. If if not resuming a previous experiment the value is 0 # # num_classes: int # Number of classes for the model # # kwargs: dict # Any additional arguments. # # Returns # ------- # model, criterion, optimizer, best_value, start_epoch # """ # # # Initialize the model #'Setting up model {}'.format(model_name)) # # output_channels = output_channels if num_classes == None else num_classes # model = models.__dict__[model_name](output_channels=output_channels, pretrained=pretrained) # # # Get the optimizer created with the specified parameters in kwargs (such as lr, momentum, ... ) # optimizer = _get_optimizer(optimizer_name, model, **kwargs) # # # Get the criterion # if disable_databalancing: # criterion = nn.CrossEntropyLoss() # else: # try: # weights = _load_class_frequencies_weights_from_file(dataset_folder, inmem, workers) # criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(weights).type(torch.FloatTensor)) #'Loading weights for data balancing') # except: # logging.warning('Unable to load information for data balancing. Using normal criterion') # criterion = nn.CrossEntropyLoss() # # # Transfer model to GPU (if desired) # if not no_cuda: #'Transfer model to GPU') # model = torch.nn.DataParallel(model).cuda() # criterion = criterion.cuda() # cudnn.benchmark = True # # # Load saved model # if load_model: # if os.path.isfile(load_model): # model_dict = torch.load(load_model) #'Loading a saved model') # try: # model.load_state_dict(model_dict['state_dict'], strict=False) # except Exception as exp: # logging.warning(exp) # else: # logging.error("No model dict found at '{}'".format(load_model)) # sys.exit(-1) # # # Resume from checkpoint # if resume: # if os.path.isfile(resume): #"Loading checkpoint '{}'".format(resume)) # checkpoint = torch.load(resume) # start_epoch = checkpoint['epoch'] # best_value = checkpoint['best_value'] # model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # # val_losses = [checkpoint['val_loss']] #not used? #"Loaded checkpoint '{}' (epoch {})".format(resume, checkpoint['epoch'])) # else: # logging.error("No checkpoint found at '{}'".format(resume)) # sys.exit(-1) # else: # best_value = 0.0 # # return model, criterion, optimizer, best_value, start_epoch