Last active December 13, 2017 07:38
Used to create the Image Database (IMDB) file which can be used for Neural networks training on MatConvNet
function [] = createIMDB(trainingImageLocation, dumpSpace)
%createIMDB: Function used to create IMDB
% Function scans through the real image database on the computer and
% creates the IMDB of well labeled and classified images. Meta data such
% as image categorizations are also included in the created db to guide
% any user who might need to use the IMDB in the future
% 70 percent of the data is used for training
% 20 percent for validation
% 10 percent reserved for testing
% TrainingSet : to fit the parameters [i.e., weights]
% ValidationSet: to tune the parameters [i.e., architecture]
% Testset : to assess the performance [i.e. predictive power]
Created on: 31st March, 2017
Author: Oluwole Oyetoke Jnr
Using MATLAB 2016
if nargin ~= 2
error('createIMDB:Input_Argument_Error','This function works with 2 input argument -trainingImageLocation, dumpSpace- ')
%IMDB Creation Start Time
datenow = datetime('now','Format','dd-MMM-yyyy HH:mm:ss');
fprintf('Start Time: %s\n\n',datenow);
%Create an empty IMDB structure
imdb = struct();
categories = {'speed_20', 'speed_30','speed_50','speed_60','speed_70',...
'mandatroy_direction_bypass_obstacle2', ...
datasets = {'train', 'validate', 'test'};
%To Create an IMDB scaled to a different size, simply change netInputSize
netInputSize = [227 227];
%.ppm (portable pixmap format) is used in this project
primaryTrainingDataPath = trainingImageLocation;
%Loads all the content of the training folder
trainingFolderStruct = dir([primaryTrainingDataPath]);
error('createIMDB2:Traing_Image_Location_Error','Error Encounterd When Loading Image Data From Folder')
[noOfTrainingFolders d] = size(trainingFolderStruct);
dirFlags = [trainingFolderStruct.isdir];
subFolderList = trainingFolderStruct(dirFlags);
%Loop through the training image folder to get total number of images in DB
%Main folder contains subfolders of images for each training class
for mainLoopCount = 3:noOfTrainingFolders
secondaryTrainingDataPath = fullfile(primaryTrainingDataPath,...
subFolderList(mainLoopCount).name, '*.ppm');
subFolderStruct = dir([secondaryTrainingDataPath]);
noOfContents2= numel(subFolderStruct);
imageCount =imageCount+noOfContents2;
% fprintf('Number of Images in Training Class %s = %d\n', ...
%subFolderList(mainLoopCount).name, noOfContents2);
fprintf('Number of Training Images in Total: %d\n', imageCount);
%Initialize part of the imdb structure
imdb.meta.sets = {'train', 'validate', 'test'};
%Possible Image Categories
imdb.meta.categories = categories;
%AlexNet Uses 227 by 227 by 3 images = ones(netInputSize(1), netInputSize(2), 3, imageCount, 'single');
imdb.images.labels = ones(1,imageCount, 'single'); %Image label
% vector indicating to which set an image belong,
%i.e., % training, validation, test etc.
imdb.images.set = ones(1, imageCount, 'uint8');
fprintf('Each image will be resized to %d by %d by 3 \n', netInputSize(1), netInputSize(2));
%Loop through Dataset, appropriately dimension all contents, label,
%classify and place in sets
for mainLoopCount = 3:noOfTrainingFolders
actualPos = mainLoopCount-2;
toWorkOn = char (categories(actualPos));
fprintf('%d. Loading and working on training, validation and test images for '' %s '' traffic sign\n',actualPos, toWorkOn);
secondaryTrainingDataPath = fullfile(primaryTrainingDataPath,...
subFolderList(mainLoopCount).name, '*.ppm');
%Get only .ppm contnets of the folder
subFolderStruct = dir([secondaryTrainingDataPath]);
%Get no. of contents in folder
noOfContents2= numel(subFolderStruct);
'Every Class in the dataset should contain at least 10 images')
%Get Number of images to be used for training, validation and testing
trainNo = floor(0.7* noOfContents2);
valNo = floor(0.2* noOfContents2);
testNo = floor(0.1* noOfContents2);
total =trainNo+valNo+testNo;
difference = noOfContents2 - total;
trainNo = trainNo+difference;
elseif (total>noOfContents2)
difference = total-noOfContents2;
trainNo = trainNo-difference;
fprintf('Out of a total of %d images in this class %d will be used for training, %d for validation and %d for testing\n', total, trainNo, valNo, testNo);
setBank = getSetPositions(trainNo, valNo, testNo);
for innerLoopCount = 1:noOfContents2
pathToImage= fullfile(primaryTrainingDataPath, subFolderList(mainLoopCount).name, subFolderStruct(innerLoopCount).name);
imageRead = imread(pathToImage);
%Check to make sure image contains 3 channels. AlexNet works with 3
[xDim yDim zDim] = size(imageRead);
threeDImage = imageRead;
if (zDim==1)
threeDImage = cat(3, imageRead, imageRead, imageRead);
%Resize Image to acceptable AlexNet Input size [227 227 3]
properSizedImageData = threeDImage;
if(xDim~=netInputSize(1) || yDim~=netInputSize(2))
properSizedImageData = imresize(threeDImage, [netInputSize(1) netInputSize(2)], 'bilinear');
%Set image back into DB & apply all related meta information
%AlexNet Uses 227 by 227 by 3 images
%Load in Image Data. Stack of 3 channels,:,1,imageCounter) = properSizedImageData(:,:,1);,:,2,imageCounter) = properSizedImageData(:,:,2);,:,3,imageCounter) = properSizedImageData(:,:,3);
%Assign Label to image
imdb.images.labels(1,imageCounter) = mainLoopCount-2;
%datasets(1) = Training, datasets(2) = Validate datasets(3)= Test
imdb.images.set(1, imageCounter) = setBank(innerLoopCount);
imageCounter = imageCounter+1;
end %Inner Loop
percentageCompleted = uint8 ((imageCounter*100)/imageCount);
fprintf('%d%% completed so far\n\n', percentageCompleted);
end %Main Loop
%Save IMDB
fprintf('Saving IMDB file\n');
filename = fullfile(dumpSpace, 'Traffic_Sign_IMDB(GSTBR)_All_32by32.mat');
save(filename, 'imdb');
datenow2 = datetime('now','Format','dd-MMM-yyyy HH:mm:ss');
fprintf('End Time: %s\n\n',datenow2);
d1=datenum(datenow); % convert to number
d2=datenum(datenow2); % convert to number
difference=d2-d1; % difference between the two
days = floor(difference);
hrs = datestr(difference, 'HH');
mins = datestr(difference, 'MM');
seconds = datestr(difference, 'SS');
% difference in days:hr:min:sec
%Escape random number generator legacy mode
fprintf('Overall Time Taken: %d day(s), %s hour(s), %s minute(s), %s second(s) \n\n',days, hrs, mins, seconds);
%Function used to pick random images for test, validate and train
function allocationSpots = getSetPositions(trainNo, valNo, testNo);
total = trainNo+valNo+testNo;
perms = randperm(total);
trainPositions = perms(1:trainNo);
valPositions = perms(trainNo+1:trainNo+valNo);
testPositions = perms(trainNo+valNo+1:trainNo+valNo+testNo);
allocationSpots = zeros(total,1);
for i=1:total
allocationSpots(i) = 1;
allocationSpots(i) = 2;
allocationSpots(i) = 3;
