felipecode/cpu_vs_gpu_augmentations.py

## cpu_vs_gpu_augmentations.py
import os
import time
import shutil

import torch
from torchvision import transforms, datasets

import matplotlib.pyplot as plt

""" Performance Test for augmentations using pytorch vision

Example code for testing the performance of GPU versus CPU when performing
image augmentations on pytorch.

To run the code you should first download the 'hymenoptera_data'
(https://download.pytorch.org/tutorial/hymenoptera_data.zip)
from pytorch examples and place it at the code folder.

Requires torch 0.4, torchvision 0.4, matplotlib

"""


# DATA READING AND PREPARATION

def listdir_fullpath(d):
    return [os.path.join(d, f) for f in os.listdir(d)]


def prepare_data(data_dir):
    # Check if the dataset is already there

    if not os.path.exists(data_dir):
        raise ValueError("Directory does not exist.")

    # Join the data in a single folder
    if os.path.exists(os.path.join(data_dir, 'train')):
        os.mkdir(os.path.join(data_dir, 'images'))
        train_folder_name = os.path.join(data_dir, 'train')
        files_train = listdir_fullpath(os.path.join(train_folder_name, 'bees')) + listdir_fullpath(
            os.path.join(train_folder_name, 'ants'))
        for file in files_train:
            shutil.move(file, os.path.join(data_dir, 'images'))

        shutil.rmtree(train_folder_name)

        val_folder_name = os.path.join(data_dir, 'val')
        files_val = listdir_fullpath(os.path.join(val_folder_name, 'bees')) + listdir_fullpath(
            os.path.join(val_folder_name, 'ants'))
        for file in files_val:
            shutil.move(file, os.path.join(data_dir, 'images'))

        shutil.rmtree(val_folder_name)


# The actual code starts here

class ToGPU(object):

    def __call__(self, img):
        return img.cuda()


class Multiply(object):

    def __init__(self, multiply):
        self.multiply = multiply

    def __call__(self, tensor):
        array = torch.cuda.FloatTensor([self.multiply]).expand_as(tensor)
        return tensor * array

    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)


class MultiplyCPU(object):

    def __init__(self, multiply):
        self.multiply = multiply

    def __call__(self, tensor):
        array = torch.FloatTensor([self.multiply]).expand_as(tensor)
        return tensor * array

    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)


if __name__ == '__main__':

    # We define parameters for the classes
    batch_size = 12
    num_workers = 12
    data_path = 'hymenoptera_data'
    # Number of repetitions to comp
    repetitions = 3

    no_aug_trans = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
    dataset = datasets.ImageFolder(data_path, transform=no_aug_trans)
    # We input this image folder dataset in a dataloader
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                              shuffle=False, num_workers=num_workers,
                                              pin_memory=True)

    # We load the data with no augmentations in the beginning just to cache some data.
    for data in data_loader:
        image, labels = data

    gpu_time_vec = []
    cpu_time_vec = []
    loading_time = 0
    for n_mult in range(5, 60, 5):

        # We load the data with no augmentations just to get the average loading time
        capture_time = time.time()
        for data in data_loader:
            image, labels = data
        loading_time += time.time() - capture_time

        # This is the declaration of the GPU augmenter. It must be made
        # outside the Dataloader since it is not compatible with multiprocessing.
        # However, data is loaded with the dataloader since the use of threads is
        # an advantage.

        multiply_gpu = transforms.Compose([ToGPU()] + [Multiply(1.01)] * n_mult)
        gpu_time = 0
        for rep in range(0, 3):
            capture_time = time.time()
            for data in data_loader:
                image, labels = data
                result = multiply_gpu(image)
            gpu_time += time.time() - capture_time

        gpu_time_vec.append(gpu_time / 3.0)
        print("Gpu Time =  ", gpu_time_vec[-1])

        # This is the CPU augmentaer that is going to work inside the DataLoader.
        aug_trans = transforms.Compose([transforms.Resize((224, 224)),
                                        transforms.ToTensor()] + [MultiplyCPU(1.01)] * n_mult)

        dataset_aug = datasets.ImageFolder(data_path, transform=aug_trans)
        data_loader_aug = torch.utils.data.DataLoader(dataset_aug, batch_size=batch_size,
                                                      shuffle=False, num_workers=num_workers,
                                                      pin_memory=True)
        cpu_time = 0
        for rep in range(0, 3):
            capture_time = time.time()
            for data in data_loader_aug:
                image, labels = data
            cpu_time += time.time() - capture_time

        cpu_time_vec.append(cpu_time / 3.0)
        print("CPU Time =  ", cpu_time_vec[-1])


    loading_time = loading_time/len(range(5,60,5))

    cpu_time_vec = [time_cpu - loading_time for time_cpu in cpu_time_vec]
    gpu_time_vec = [time_gpu - loading_time for time_gpu in gpu_time_vec]

    fig, ax = plt.subplots()
    ax.plot(range(5, 60, 5), gpu_time_vec, range(5, 60, 5), cpu_time_vec)
    ax.set_xlabel('Number of Multiplications')
    ax.set_ylabel('Time (seconds)')

    plt.show()
	import os
	import time
	import shutil

	import torch
	from torchvision import transforms, datasets

	import matplotlib.pyplot as plt

	""" Performance Test for augmentations using pytorch vision

	Example code for testing the performance of GPU versus CPU when performing
	image augmentations on pytorch.

	To run the code you should first download the 'hymenoptera_data'
	(https://download.pytorch.org/tutorial/hymenoptera_data.zip)
	from pytorch examples and place it at the code folder.

	Requires torch 0.4, torchvision 0.4, matplotlib

	"""


	# DATA READING AND PREPARATION

	def listdir_fullpath(d):
	return [os.path.join(d, f) for f in os.listdir(d)]


	def prepare_data(data_dir):
	# Check if the dataset is already there

	if not os.path.exists(data_dir):
	raise ValueError("Directory does not exist.")

	# Join the data in a single folder
	if os.path.exists(os.path.join(data_dir, 'train')):
	os.mkdir(os.path.join(data_dir, 'images'))
	train_folder_name = os.path.join(data_dir, 'train')
	files_train = listdir_fullpath(os.path.join(train_folder_name, 'bees')) + listdir_fullpath(
	os.path.join(train_folder_name, 'ants'))
	for file in files_train:
	shutil.move(file, os.path.join(data_dir, 'images'))

	shutil.rmtree(train_folder_name)

	val_folder_name = os.path.join(data_dir, 'val')
	files_val = listdir_fullpath(os.path.join(val_folder_name, 'bees')) + listdir_fullpath(
	os.path.join(val_folder_name, 'ants'))
	for file in files_val:
	shutil.move(file, os.path.join(data_dir, 'images'))

	shutil.rmtree(val_folder_name)


	# The actual code starts here

	class ToGPU(object):

	def __call__(self, img):
	return img.cuda()


	class Multiply(object):

	def __init__(self, multiply):
	self.multiply = multiply

	def __call__(self, tensor):
	array = torch.cuda.FloatTensor([self.multiply]).expand_as(tensor)
	return tensor * array

	def __repr__(self):
	return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)


	class MultiplyCPU(object):

	def __init__(self, multiply):
	self.multiply = multiply

	def __call__(self, tensor):
	array = torch.FloatTensor([self.multiply]).expand_as(tensor)
	return tensor * array

	def __repr__(self):
	return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)


	if __name__ == '__main__':

	# We define parameters for the classes
	batch_size = 12
	num_workers = 12
	data_path = 'hymenoptera_data'
	# Number of repetitions to comp
	repetitions = 3

	no_aug_trans = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
	dataset = datasets.ImageFolder(data_path, transform=no_aug_trans)
	# We input this image folder dataset in a dataloader
	data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
	shuffle=False, num_workers=num_workers,
	pin_memory=True)

	# We load the data with no augmentations in the beginning just to cache some data.
	for data in data_loader:
	image, labels = data

	gpu_time_vec = []
	cpu_time_vec = []
	loading_time = 0
	for n_mult in range(5, 60, 5):

	# We load the data with no augmentations just to get the average loading time
	capture_time = time.time()
	for data in data_loader:
	image, labels = data
	loading_time += time.time() - capture_time

	# This is the declaration of the GPU augmenter. It must be made
	# outside the Dataloader since it is not compatible with multiprocessing.
	# However, data is loaded with the dataloader since the use of threads is
	# an advantage.

	multiply_gpu = transforms.Compose([ToGPU()] + [Multiply(1.01)] * n_mult)
	gpu_time = 0
	for rep in range(0, 3):
	capture_time = time.time()
	for data in data_loader:
	image, labels = data
	result = multiply_gpu(image)
	gpu_time += time.time() - capture_time

	gpu_time_vec.append(gpu_time / 3.0)
	print("Gpu Time = ", gpu_time_vec[-1])

	# This is the CPU augmentaer that is going to work inside the DataLoader.
	aug_trans = transforms.Compose([transforms.Resize((224, 224)),
	transforms.ToTensor()] + [MultiplyCPU(1.01)] * n_mult)

	dataset_aug = datasets.ImageFolder(data_path, transform=aug_trans)
	data_loader_aug = torch.utils.data.DataLoader(dataset_aug, batch_size=batch_size,
	shuffle=False, num_workers=num_workers,
	pin_memory=True)
	cpu_time = 0
	for rep in range(0, 3):
	capture_time = time.time()
	for data in data_loader_aug:
	image, labels = data
	cpu_time += time.time() - capture_time

	cpu_time_vec.append(cpu_time / 3.0)
	print("CPU Time = ", cpu_time_vec[-1])


	loading_time = loading_time/len(range(5,60,5))

	cpu_time_vec = [time_cpu - loading_time for time_cpu in cpu_time_vec]
	gpu_time_vec = [time_gpu - loading_time for time_gpu in gpu_time_vec]

	fig, ax = plt.subplots()
	ax.plot(range(5, 60, 5), gpu_time_vec, range(5, 60, 5), cpu_time_vec)
	ax.set_xlabel('Number of Multiplications')
	ax.set_ylabel('Time (seconds)')

	plt.show()