Skip to content

Instantly share code, notes, and snippets.

@plankes-projects
Last active May 27, 2016 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save plankes-projects/c6c169dd051ba22edbb3e12f0acf1c06 to your computer and use it in GitHub Desktop.
Save plankes-projects/c6c169dd051ba22edbb3e12f0acf1c06 to your computer and use it in GitHub Desktop.
AlexNet configuration for DeepLearning4j
package trainer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.lossfunctions.LossFunctions;
public class AlexNetTrainer extends AbstractTrainer {
@Override
protected void init() {
epochs = 5;
}
@Override
protected MultiLayerConfiguration buildConfig(int imageWidth, int imageHeight, int channel, int numOfClasses) {
int seed = 123;
int iterations = 1;
WeightInit weightInit = WeightInit.XAVIER;
String activation = "relu";
Updater updater = Updater.NESTEROVS;
double lr = 1e-3;
double mu = 0.9;
double l2 = 5e-4;
boolean regularization = true;
SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX;
double nonZeroBias = 1;
double dropOut = 0.5;
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations)
.activation(activation).weightInit(weightInit)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(lr).momentum(mu)
.regularization(regularization).l2(l2).updater(updater).useDropConnect(true)
// AlexNet
.list()
.layer(0,
new ConvolutionLayer.Builder(new int[] { 11, 11 }, new int[] { 4, 4 }, new int[] { 3, 3 })
.name("cnn1").nIn(channel).nOut(96).build())
.layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
.layer(2,
new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
.name("maxpool1").build())
.layer(3,
new ConvolutionLayer.Builder(new int[] { 5, 5 }, new int[] { 1, 1 }, new int[] { 2, 2 })
.name("cnn2").nOut(256).biasInit(nonZeroBias).build())
.layer(4,
new LocalResponseNormalization.Builder().name("lrn2").k(2).n(5).alpha(1e-4).beta(0.75).build())
.layer(5,
new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
.name("maxpool2").build())
.layer(6,
new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
.name("cnn3").nOut(384).build())
.layer(7,
new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
.name("cnn4").nOut(384).biasInit(nonZeroBias).build())
.layer(8,
new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 })
.name("cnn5").nOut(256).biasInit(nonZeroBias).build())
.layer(9,
new SubsamplingLayer.Builder(poolingType, new int[] { 3, 3 }, new int[] { 2, 2 })
.name("maxpool3").build())
.layer(10,
new DenseLayer.Builder().name("ffn1").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
.layer(11,
new DenseLayer.Builder().name("ffn2").nOut(4096).biasInit(nonZeroBias).dropOut(dropOut).build())
.layer(12,
new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output")
.nOut(numOfClasses).activation("softmax").build())
.backprop(true).pretrain(false).cnnInputSize(imageHeight, imageWidth, channel);
return builder.build();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment