Skip to content

Instantly share code, notes, and snippets.

@dan-lind
Created October 4, 2016 13:00
Show Gist options
  • Save dan-lind/248db1cca2d6e1226fcc85928c4a68de to your computer and use it in GitHub Desktop.
Save dan-lind/248db1cca2d6e1226fcc85928c4a68de to your computer and use it in GitHub Desktop.
import org.datavec.api.io.filters.BalancedPathFilter;
import org.datavec.api.io.labels.ParentPathLabelGenerator;
import org.datavec.api.split.FileSplit;
import org.datavec.api.split.InputSplit;
import org.datavec.image.loader.BaseImageLoader;
import org.datavec.image.recordreader.ImageRecordReader;
import org.datavec.image.transform.ImageTransform;
import org.datavec.image.transform.MultiImageTransform;
import org.datavec.image.transform.ResizeImageTransform;
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.deeplearning4j.ui.weights.HistogramIterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.Random;
/**
* Created by susaneraly on 6/9/16.
*/
public class ImagePipelineExample {
protected static final Logger log = LoggerFactory.getLogger(ImagePipelineExample.class);
//Images are of format given by allowedExtension -
protected static final String [] allowedExtensions = BaseImageLoader.ALLOWED_FORMATS;
protected static final long seed = 12345;
public static final Random randNumGen = new Random(seed);
protected static int height = 100;
protected static int width = 100;
protected static int channels = 3;
protected static int numExamples = 80;
protected static int outputNum = 5;
public static void main(String[] args) throws Exception {
//DIRECTORY STRUCTURE:
//Images in the dataset have to be organized in directories by class/label.
//In this example there are ten images in three classes
//Here is the directory structure
// parentDir
// / | \
// / | \
// labelA labelB labelC
//
//Set your data up like this so that labels from each label/class live in their own directory
//And these label/class directories live together in the parent directory
//
//
File parentDir = new File("/Users/danlin/Documents/parent/");
//Files in directories under the parent dir that have "allowed extensions" plit needs a random number generator for reproducibility when splitting the files into train and test
FileSplit filesInDir = new FileSplit(parentDir, allowedExtensions, randNumGen);
//You do not have to manually specify labels. This class (instantiated as below) will
//parse the parent dir and use the name of the subdirectories as label/class names
ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator();
//The balanced path filter gives you fine tune control of the min/max cases to load for each class
//Below is a bare bones version. Refer to javadocs for details
BalancedPathFilter pathFilter = new BalancedPathFilter(randNumGen, allowedExtensions, labelMaker);
//Split the image files into train and test. Specify the train test split as 80%,20%
InputSplit[] filesInDirSplit = filesInDir.sample(pathFilter, 80, 20);
InputSplit trainData = filesInDirSplit[0];
InputSplit testData = filesInDirSplit[1];
//Specifying a new record reader with the height and width you want the images to be resized to.
//Note that the images in this example are all of different size
//They will all be resized to the height and width specified below
ImageRecordReader recordReader = new ImageRecordReader(height,width,channels,labelMaker);
ImageRecordReader recordReaderTest = new ImageRecordReader(height,width,channels,labelMaker);
//Often there is a need to transforming images to artificially increase the size of the dataset
//DataVec has built in powerful features from OpenCV
//You can chain transformations as shown below, write your own classes that will say detect a face and crop to size
/*ImageTransform transform = new MultiImageTransform(randNumGen,
new CropImageTransform(10), new FlipImageTransform(),
new ScaleImageTransform(10), new WarpImageTransform(10));
*/
//You can use the ShowImageTransform to view your images
//Code below gives you a look before and after, for a side by side comparison
ImageTransform transform = new MultiImageTransform(randNumGen, new ResizeImageTransform(100,100));
ImageTransform transformTest = new MultiImageTransform(randNumGen, new ResizeImageTransform(100,100));
log.info("Load data....");
//Initialize the record reader with the train data and the transform chain
recordReader.initialize(trainData,transform);
//convert the record reader to an iterator for training - Refer to other examples for how to use an iterator
DataSetIterator customTrain = new RecordReaderDataSetIterator(recordReader, 10, 1, outputNum);
recordReaderTest.initialize(testData,transformTest);
//convert the record reader to an iterator for training - Refer to other examples for how to use an iterator
DataSetIterator customTest = new RecordReaderDataSetIterator(recordReaderTest, 10, 1, outputNum);
int nChannels = 3;
int outputNum = 5;
int batchSize = 64;
int nEpochs = 10;
int iterations = 1;
int seed = 123;
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.seed(seed)
.iterations(iterations)
.regularization(true).l2(0.0005)
.learningRate(0.01)//.biasLearningRate(0.02)
//.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75)
.weightInit(WeightInit.XAVIER)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.updater(Updater.NESTEROVS).momentum(0.9)
.list()
.layer(0, new ConvolutionLayer.Builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
.nIn(nChannels)
.stride(1, 1)
.nOut(20)
.activation("identity")
.build())
.layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2,2)
.stride(2,2)
.build())
.layer(2, new ConvolutionLayer.Builder(5, 5)
//Note that nIn need not be specified in later layers
.stride(1, 1)
.nOut(50)
.activation("identity")
.build())
.layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2,2)
.stride(2,2)
.build())
.layer(4, new DenseLayer.Builder().activation("relu")
.nOut(500).build())
.layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
.nOut(outputNum)
.activation("softmax")
.build())
.setInputType(InputType.convolutional(100,100,3)) //See note below
.backprop(true).pretrain(false);
/*
Regarding the .setInputType(InputType.convolutionalFlat(28,28,1)) line: This does a few things.
(a) It adds preprocessors, which handle things like the transition between the convolutional/subsampling layers
and the dense layers
(b) Does some additional configuration validation
(c) Where necessary, sets the nIn (number of input neurons, or input depth in the case of CNNs) values for each
layer based on the size of the previous layer (but it won't override values manually set by the user)
In earlier versions of DL4J, the (now deprecated) ConvolutionLayerSetup class was used instead for this.
InputTypes can be used with other layer types too (RNNs, MLPs etc) not just CNNs.
For normal images (when using ImageRecordReader) use InputType.convolutional(height,width,depth).
MNIST record reader is a special case, that outputs 28x28 pixel grayscale (nChannels=1) images, in a "flattened"
row vector format (i.e., 1x784 vectors), hence the "convolutionalFlat" input type used here.
*/
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.init();
log.info("Train model....");
model.setListeners(new ScoreIterationListener(1), new HistogramIterationListener(1));
for( int i=0; i<nEpochs; i++ ) {
model.fit(customTrain);
log.info("*** Completed epoch {} ***", i);
log.info("Evaluate model....");
Evaluation eval = new Evaluation(outputNum);
while(customTest.hasNext()){
DataSet ds = customTest.next();
INDArray output = model.output(ds.getFeatureMatrix(), false);
eval.eval(ds.getLabels(), output);
}
log.info(eval.stats());
customTest.reset();
}
log.info("****************Example finished********************");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment