Skip to content

Instantly share code, notes, and snippets.

@guimeira
Last active May 3, 2019 12:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guimeira/61a6ff6c347f8cc123e1ace3f92a8fa8 to your computer and use it in GitHub Desktop.
Save guimeira/61a6ff6c347f8cc123e1ace3f92a8fa8 to your computer and use it in GitHub Desktop.
Caffe Python Data Augmentation Layer
import caffe
import cv2
import numpy as np
import sys
import re
import random
from Queue import Queue
from threading import Thread
from multiprocessing import cpu_count
###############################################################################
# Selectors: #
# For each image, the selector's method `select` is invoked. Its `numFilters` #
# parameter indicates the number of filters that are currently available and #
# it must return a list of integers, containing the 0-based indexes of the #
# filters that are going to be applied to an image. #
###############################################################################
# Uniform selector:
# Its `probability` parameter defines the probability of filters being applied
# to an image. E.g.: if probability is 0.8, there is an 80% chance of applying
# filters to an image. The set of filters to be applied is selected at random.
class Uniform():
def __init__(self, probability):
self.probability = float(probability)
def select(self, numFilters):
if random.random() < self.probability:
numSelected = random.randint(1,numFilters)
filters = random.sample(range(numFilters), numSelected)
return filters
else:
return []
###############################################################################
# Filters: #
# The filter's `apply` method is invoked for every image that this filter is #
# going to process. The image received as a parameter is a numpy array shaped #
# like an OpenCV image (axis 0 is height, 1 is width and 2 is the channel). #
###############################################################################
# Flip:
# Flips an image vertically or horizontally. The constructor parameter
# `flipMode` is a string containing either 'h' for horizontal flip or 'v' for
# vertical flip.
class Flip():
def __init__(self, flipMode):
if flipMode == 'v':
self.flipMode = 0
elif flipMode == 'h':
self.flipMode = 1
else:
raise Exception('Invalid flip mode')
def apply(self, cvImg):
return cv2.flip(cvImg,self.flipMode)
# Rotate:
# Rotates an image by an arbitrary number of degrees. The `degrees` parameter
# defines the number of degrees (counterclockwise) and the optional parameter
# `fillBackground`, when set to True, uses the upper-left pixel of the original
# image to fill the black areas created by the rotation. By default, those
# areas are filled with black pixels. Rotations that are multiple of 90 degrees
# do not generate those areas.
class Rotate():
def __init__(self, degrees, fillBackground = False):
self.degrees = degrees
self.fillBackground = fillBackground
self.rotMat = None
def setup(self,imgSize):
center = (imgSize[0]/2, imgSize[1]/2)
self.rotMat = cv2.getRotationMatrix2D(center,self.degrees,1.0)
self.imgSize = (imgSize[0], imgSize[1])
def apply(self,cvImg):
backgroundColor = (0,0,0)
if self.fillBackground:
backgroundColor = tuple(cvImg[0,0,:].astype(int))
if self.rotMat is None:
self.imgSize = (cvImg.shape[1], cvImg.shape[0])
center = (self.imgSize[0]/2, self.imgSize[1]/2)
self.rotMat = cv2.getRotationMatrix2D(center,self.degrees,1.0)
return cv2.warpAffine(cvImg, self.rotMat, self.imgSize, flags=cv2.INTER_LANCZOS4, borderMode=cv2.BORDER_CONSTANT, borderValue=backgroundColor)
# Scale:
# Scales an image by a factor. The `factor` parameter indicates how much the image
# must be scaled. A number larger than 1 enlarges the image (1.1 = 10% bigger). A
# number between 0 and 1 shrinks it (0.9 = 10% smaller). The `fillBackground`
# parameter works the same way as on the Rotate filter.
class Scale():
def __init__(self, factor, fillBackground = False):
self.factor = factor
self.fillBackground = fillBackground
self.transfMat = None
def apply(self,cvImg):
backgroundColor = (0,0,0)
if self.fillBackground:
backgroundColor = tuple(cvImg[0,0,:].astype(int))
if self.transfMat is None:
self.imgSize = (cvImg.shape[1], cvImg.shape[0])
center = (self.imgSize[0]/2, self.imgSize[1]/2)
self.transfMat = cv2.getRotationMatrix2D(center,0,self.factor)
return cv2.warpAffine(cvImg, self.transfMat, self.imgSize, flags=cv2.INTER_LANCZOS4, borderMode=cv2.BORDER_CONSTANT, borderValue=backgroundColor)
# Brightness:
# Increases or decreases brightness of the image. The value `beta` is added to the
# three color channels. If `floodFill` is True, after the brightness change,
# we apply a floodfill using the original value of the upper-left pixel to keep the
# background color unchanged.
class Brightness():
def __init__(self, beta, floodFill = False):
self.beta = beta
self.floodFill = floodFill
def apply(self, cvImg):
backgroundColor = tuple(cvImg[0,0,:].astype(int))
processed = cv2.add(cvImg, (self.beta, self.beta, self.beta, 0))
if self.floodFill:
cv2.floodFill(processed, None, (0,0), backgroundColor)
return processed
# Contrast:
# Increases or decreases contrast of the image. All the valus on the image are multiplied
# by the `alpha` parameter. If 0 < alpha < 1, the contrast is decreased, if alpha > 1, the
# contrast is increased. If `floodFill` is True, after the brightness change, we apply a
# floodfill using the original value of the upper-left pixel to keep the background color
# unchanged.
class Contrast():
def __init__(self, alpha, floodFill = False):
self.alpha = alpha
self.floodFill = floodFill
def apply(self, cvImg):
backgroundColor = tuple(cvImg[0,0,:].astype(int))
processed = cv2.multiply(cvImg, (self.alpha, self.alpha, self.alpha, 0))
if self.floodFill:
cv2.floodFill(processed, None, (0,0), backgroundColor)
return processed
# Sharpen:
# Sharpens the image
class Sharpen():
def apply(self, cvImg):
processed = cv2.GaussianBlur(cvImg, (5,5), 0)
return cv2.addWeighted(cvImg, 1.5, processed, -0.5, 0)
# This method is executed by each one of the image processing threads.
# It allocates the selector and filters and watches a queue for images
# coming from the main thread.
def workerThread(queue, config):
#Parse configuration string:
parsedConfig = eval('{'+config+'}')
#Make sure the configurations are here:
if 'selector' not in parsedConfig:
raise Exception('Selector configuration missing')
if 'filters' not in parsedConfig:
raise Exception('Filter configuration missing')
selector = parsedConfig['selector']
filters = parsedConfig['filters']
numFilters = len(filters)
#Infinite loop:
while True:
#Get image from queue:
(bottom,top,index) = queue.get()
#Convert to the OpenCV shape:
caffeIn = bottom[0].data[index,...]
cvIn = np.transpose(caffeIn,(1,2,0))
#Select filters and invoke them:
for fPos in selector.select(numFilters):
f = filters[fPos]
cvIn = f.apply(cvIn)
#Convert back to the Caffe format:
caffeOut = np.transpose(cvIn, (2,0,1))
top[0].data[index,:] = caffeOut
#Complete task:
queue.task_done()
# The Augmentation Layer applies transformations to the images
# on the fly. This effectively increases the size of a dataset
# and usually prevents overfitting.
class AugmentationLayer(caffe.Layer):
def setup(self, bottom, top):
config = self.param_str
self.batchSize = bottom[0].data.shape[0]
#Create worker threads:
self.queue = Queue(maxsize=0)
for i in range(cpu_count()):
thread = Thread(target=workerThread,args=(self.queue,config))
thread.setDaemon(True)
thread.start()
def reshape(self, bottom, top):
#The output of this layer has the same size as the input.
#(Maybe this could be changed in the future?)
top[0].reshape(*bottom[0].data.shape)
def forward(self, bottom, top):
#Forward pass of the network. We put every image of the batch
#on the queue and wait for the worker threads to do their job.
for i in range(self.batchSize):
self.queue.put((bottom,top,i))
self.queue.join()
def backward(self, bottom, top):
#Backward pass of the network. Nothing to do here.
pass
# LeNet
name: "LeNet"
layer {
name: "train-data"
type: "Data"
top: "data"
top: "label"
data_param {
batch_size: 64
}
include { stage: "train" }
}
layer {
name: "val-data"
type: "Data"
top: "data"
top: "label"
data_param {
batch_size: 32
}
include { stage: "val" }
}
layer {
name: "augmentation"
type: "Python"
bottom: "data"
top: "aug"
include {
phase: TRAIN
}
python_param {
module: "digits_python_layers"
layer: "AugmentationLayer"
param_str: "\"selector\": Uniform(0.8), \"filters\": [Scale(1.1),Scale(0.9, True),Rotate(30,True),Rotate(60,True),Rotate(90,True),Flip(\"v\"),Flip(\"h\"),Brightness(30),Brightness(-30,True),Contrast(0.9,True),Contrast(1.1),Sharpen()]"
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "aug"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
include {
phase: TRAIN
}
convolution_param {
num_output: 32
kernel_size: 11
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
exclude {
phase: TRAIN
}
convolution_param {
num_output: 32
kernel_size: 11
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "conv1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 32
kernel_size: 11
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool1"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 6
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 6
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv4"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 512
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "drop1"
type: "Dropout"
bottom: "ip1"
top: "ip1"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
# Since num_output is unset, DIGITS will automatically set it to the
# number of classes in your dataset.
# Uncomment this line to set it explicitly:
#num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip2"
bottom: "label"
top: "accuracy"
include { stage: "val" }
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "loss"
exclude { stage: "deploy" }
}
layer {
name: "softmax"
type: "Softmax"
bottom: "ip2"
top: "softmax"
include { stage: "deploy" }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment