Skip to content

Instantly share code, notes, and snippets.

Last active May 27, 2016 13:31
Show Gist options
  • Save plankes-projects/c6c169dd051ba22edbb3e12f0acf1c06 to your computer and use it in GitHub Desktop.
Save plankes-projects/c6c169dd051ba22edbb3e12f0acf1c06 to your computer and use it in GitHub Desktop.
AlexNet configuration for DeepLearning4j
package trainer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.lossfunctions.LossFunctions;
public class AlexNetTrainer extends AbstractTrainer {
protected void init() {
epochs = 5;
protected MultiLayerConfiguration buildConfig(int imageWidth, int imageHeight, int channel, int numOfClasses) {
int seed = 123;
int iterations = 1;
WeightInit weightInit = WeightInit.XAVIER;
String activation = "relu";
Updater updater = Updater.NESTEROVS;
double lr = 1e-3;
double mu = 0.9;
double l2 = 5e-4;
boolean regularization = true;
SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX;
double nonZeroBias = 1;
double dropOut = 0.5;
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
// AlexNet
new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 })
.layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 })
new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75).build())
new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
new DenseLayer.Builder().name("ffn1").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
new DenseLayer.Builder().name("ffn2").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output")
.backprop(true).pretrain(false).cnnInputSize(imageHeight, imageWidth, channel);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment