-
-
Save amersa64/362962a1ddf219273e8e1b132cb27b8d to your computer and use it in GitHub Desktop.
int numfilters = 128; | |
int kernelSize = 5; | |
int maxVocabIndex = 10000; | |
int numClasses = 2; | |
int embeddingLength = 50; | |
//Set up network configuration | |
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() | |
// workspace settings for memory management | |
.weightInit(WeightInit.RELU) | |
.seed(seed) | |
.updater(new Adam(0.01)) | |
.activation(Activation.RELU) | |
.graphBuilder() | |
.addInputs("input") | |
.addLayer("embed", new EmbeddingLayer.Builder() | |
.nIn(maxVocabIndex) | |
.nOut(embeddingLength) | |
.build(), "input") | |
.addLayer("cnn1", new Convolution1DLayer.Builder() | |
.nIn(embeddingLength) | |
.nOut(numfilters) | |
.kernelSize(kernelSize) | |
.activation(Activation.RELU) | |
.build(), "embed") | |
.addLayer("globalPool1", new GlobalPoolingLayer.Builder() | |
.poolingType(PoolingType.MAX) | |
.build(), "cnn1") | |
.addLayer("predict", new DenseLayer.Builder() | |
.nIn(numfilters) | |
.nOut(numClasses) | |
.dropOut(0.9) | |
.activation(Activation.SOFTMAX) | |
.build(), "globalPool1") | |
.addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT) | |
.build(), "predict") | |
.setOutputs("loss") | |
.setInputTypes(InputType.recurrent(1, maxSequenceLength)) | |
.build(); |
/** | |
* @param docTokensMap map of docId to list of ordered token Indices from documents | |
* @param trainingLabels map of docId to label | |
* return dataset object | |
*/ | |
private DataSet createVectors(Map<String,Integer> trainingLabels, Map<String,List<Integer>> docTokensMap){ | |
int rowIndex = 0; | |
int maxLength = 128; | |
INDArray featureMatrix = Nd4j.create(docTokensMap.size(), 1, maxLength); | |
INDArray labelsMatrix = Nd4j.create(docTokensMap.size(), 2); | |
for (String docId : docId) { | |
List<Integer> tokIndices = docTokensMap.get(docId); | |
//get sequence length to use it for truncation later | |
int sequenceLength = Math.min(tokensIds.size(), maxLength); | |
if (sequenceLength == 0) { | |
continue; | |
} | |
for (int i = 0; i < tokIndices.size(); i++) { | |
Integer index = tokIndices.get(i); | |
featureMatrix.putScalar(new int[]{rowIndex, 0, i}, index); | |
} | |
//this is only done if we are in training mode | |
if (train) { | |
int idx = trainingLabels.get(docId) == 1 ? 0 : 1; | |
labelsMatrix.putScalar(new int[]{rowIndex, idx}, 1.0); //Set label: [0,1] for negative, [1,0] for positive | |
} | |
} | |
// create the classifier problem data | |
return new DataSet(featureMatrix, labelsMatrix, null, null); | |
} | |
private void initCuda(){ | |
// I tried to either completely turn off periodic GC | |
// or reduce the frequency | |
// Nd4j.getMemoryManager().setAutoGcWindow(100000); | |
Nd4j.getMemoryManager().togglePeriodicGc(false); | |
CudaEnvironment.getInstance().getConfiguration() | |
.allowMultiGPU(false); | |
//.setMaximumDeviceCache((long) (0.5 * 4 * 1024 * 1024 * 1024L)) | |
} |
When I added the embedding layer I can see this weird -1 in my network shape when I print it out the info for the cnn layer!
----- Network Activations: Inferred Activation Shapes -----
Current Minibatch Size 2322
Current Input Shape (Input 0) [2322, 1, 128]
Idx Name Layer Type Activations Type Activations Shape # Elements Memory
0 input InputVertex InputTypeRecurrent(1,timeSeriesLength=128) [2322, 1, 128] 297216 1.13 MB (1188864)
1 embed EmbeddingLayer InputTypeFeedForward(50) [2322, 50] 116100 453.52 KB (464400)
----------> 2 cnn1 Convolution1DLayer InputTypeRecurrent(128) [2322, 128, -1] -297216 -1.13 MB
3 globalPool1 GlobalPoolingLayer InputTypeFeedForward(128) [2322, 128] 297216 1.13 MB (1188864)
4 predict DenseLayer InputTypeFeedForward(2) [2322, 2] 4644 18.14 KB (18576)
5 loss LossLayer InputTypeFeedForward(0) [2322, 0] 0 0 B
Total Activations Memory 1.59 MB (1671840)
Total Activation Gradient Memory 1.59 MB (1671840)
This is for GPU:
----- Memory Configuration -----
JVM Memory: XMX 1.78 GB (1908932608)
JVM Memory: current 1.01 GB (1083703296)
JavaCPP Memory: Max Bytes 8 GB (8589934592)
JavaCPP Memory: Max Physical 10 GB (10737418240)
JavaCPP Memory: Current Bytes 4.55 GB (4884915841)
JavaCPP Memory: Current Physical 4.52 GB (4856979456)
This is for CPU:
----- Memory Configuration -----
JVM Memory: XMX 1.78 GB (1908932608)
JVM Memory: current 1.25 GB (1337458688)
JavaCPP Memory: Max Bytes 8 GB (8589934592)
JavaCPP Memory: Max Physical 10 GB (10737418240)
JavaCPP Memory: Current Bytes 814.36 MB (853923541)
JavaCPP Memory: Current Physical 2.14 GB (2302488576)
can you explain this?