plankes-projects/AlexNetTrainer.java

## AlexNetTrainer.java
package trainer;

import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.lossfunctions.LossFunctions;

public class AlexNetTrainer extends AbstractTrainer {

	@Override
	protected void init() {
		epochs = 5;
	}

	@Override
	protected MultiLayerConfiguration buildConfig(int imageWidth, int imageHeight, int channel, int numOfClasses) {
		int seed = 123;
		int iterations = 1;

		WeightInit weightInit = WeightInit.XAVIER;
		String activation = "relu";
		Updater updater = Updater.NESTEROVS;
		double lr = 1e-3;
		double mu = 0.9;
		double l2 = 5e-4;
		boolean regularization = true;

		SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX;
		double nonZeroBias = 1;
		double dropOut = 0.5;

		MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
				.activation(activation).weightInit(weightInit)
				.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
				.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(lr).momentum(mu)
				.regularization(regularization).l2(l2).updater(updater).useDropConnect(true)

				// AlexNet
				.list()
				.layer(0,
						new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 })
								.name("cnn1").nIn(channel).nOut(96).build())
				.layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
				.layer(2,
						new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
								.name("maxpool1").build())
				.layer(3,
						new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 })
								.name("cnn2").nOut(256).biasInit(nonZeroBias).build())
				.layer(4,
						new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75).build())
				.layer(5,
						new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
								.name("maxpool2").build())
				.layer(6,
						new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
								.name("cnn3").nOut(384).build())
				.layer(7,
						new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
								.name("cnn4").nOut(384).biasInit(nonZeroBias).build())
				.layer(8,
						new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
								.name("cnn5").nOut(256).biasInit(nonZeroBias).build())
				.layer(9,
						new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
								.name("maxpool3").build())
				.layer(10,
						new DenseLayer.Builder().name("ffn1").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
				.layer(11,
						new DenseLayer.Builder().name("ffn2").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
				.layer(12,
						new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output")
								.nOut(numOfClasses).activation("softmax").build())
				.backprop(true).pretrain(false).cnnInputSize(imageHeight, imageWidth, channel);

		return builder.build();
	}
}
	package trainer;

	import org.deeplearning4j.nn.api.OptimizationAlgorithm;
	import org.deeplearning4j.nn.conf.GradientNormalization;
	import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
	import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
	import org.deeplearning4j.nn.conf.Updater;
	import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
	import org.deeplearning4j.nn.conf.layers.DenseLayer;
	import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization;
	import org.deeplearning4j.nn.conf.layers.OutputLayer;
	import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
	import org.deeplearning4j.nn.weights.WeightInit;
	import org.nd4j.linalg.lossfunctions.LossFunctions;

	public class AlexNetTrainer extends AbstractTrainer {

	@Override
	protected void init() {
	epochs = 5;
	}

	@Override
	protected MultiLayerConfiguration buildConfig(int imageWidth, int imageHeight, int channel, int numOfClasses) {
	int seed = 123;
	int iterations = 1;

	WeightInit weightInit = WeightInit.XAVIER;
	String activation = "relu";
	Updater updater = Updater.NESTEROVS;
	double lr = 1e-3;
	double mu = 0.9;
	double l2 = 5e-4;
	boolean regularization = true;

	SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX;
	double nonZeroBias = 1;
	double dropOut = 0.5;

	MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
	.activation(activation).weightInit(weightInit)
	.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
	.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(lr).momentum(mu)
	.regularization(regularization).l2(l2).updater(updater).useDropConnect(true)

	// AlexNet
	.list()
	.layer(0,
	new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 })
	.name("cnn1").nIn(channel).nOut(96).build())
	.layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
	.layer(2,
	new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
	.name("maxpool1").build())
	.layer(3,
	new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 })
	.name("cnn2").nOut(256).biasInit(nonZeroBias).build())
	.layer(4,
	new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75).build())
	.layer(5,
	new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
	.name("maxpool2").build())
	.layer(6,
	new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
	.name("cnn3").nOut(384).build())
	.layer(7,
	new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
	.name("cnn4").nOut(384).biasInit(nonZeroBias).build())
	.layer(8,
	new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
	.name("cnn5").nOut(256).biasInit(nonZeroBias).build())
	.layer(9,
	new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
	.name("maxpool3").build())
	.layer(10,
	new DenseLayer.Builder().name("ffn1").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
	.layer(11,
	new DenseLayer.Builder().name("ffn2").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
	.layer(12,
	new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output")
	.nOut(numOfClasses).activation("softmax").build())
	.backprop(true).pretrain(false).cnnInputSize(imageHeight, imageWidth, channel);

	return builder.build();
	}
	}