amersa64/NetworkConfiguration.java Secret

## NetworkConfiguration.java
int numfilters = 128;
int kernelSize = 5;
int maxVocabIndex = 10000;
int numClasses = 2;
int embeddingLength = 50;
//Set up network configuration
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
        // workspace settings for memory management
        .weightInit(WeightInit.RELU)
        .seed(seed)
        .updater(new Adam(0.01))
        .activation(Activation.RELU)
        .graphBuilder()
        .addInputs("input")
        .addLayer("embed", new EmbeddingLayer.Builder()
                .nIn(maxVocabIndex)
                .nOut(embeddingLength)
                .build(), "input")
        .addLayer("cnn1", new Convolution1DLayer.Builder()
                .nIn(embeddingLength)
                .nOut(numfilters)
                .kernelSize(kernelSize)
                .activation(Activation.RELU)
                .build(), "embed")
        .addLayer("globalPool1", new GlobalPoolingLayer.Builder()
                .poolingType(PoolingType.MAX)
                .build(), "cnn1")
        .addLayer("predict", new DenseLayer.Builder()
                .nIn(numfilters)
                .nOut(numClasses)
                .dropOut(0.9)
                .activation(Activation.SOFTMAX)
                .build(), "globalPool1")
        .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT)
                .build(), "predict")
        .setOutputs("loss")
        .setInputTypes(InputType.recurrent(1, maxSequenceLength))
        .build();

## VectorCreator.java
/**
  * @param docTokensMap map of docId to list of ordered token Indices from documents
  * @param trainingLabels map of docId to label
  * return dataset object
  */
private DataSet createVectors(Map<String,Integer> trainingLabels, Map<String,List<Integer>> docTokensMap){
 int rowIndex = 0;
 int maxLength = 128;
 INDArray featureMatrix = Nd4j.create(docTokensMap.size(), 1, maxLength);
 INDArray labelsMatrix = Nd4j.create(docTokensMap.size(), 2);
  for (String docId : docId) {
            List<Integer> tokIndices = docTokensMap.get(docId);
            //get sequence length to use it for truncation later
            int sequenceLength = Math.min(tokensIds.size(), maxLength);
            if (sequenceLength == 0) {
                continue;
            }
            for (int i = 0; i < tokIndices.size(); i++) {
                Integer index = tokIndices.get(i);
                featureMatrix.putScalar(new int[]{rowIndex, 0, i}, index);
            }
            //this is only done if we are in training mode
            if (train) {
                int idx = trainingLabels.get(docId) == 1 ? 0 : 1;
                labelsMatrix.putScalar(new int[]{rowIndex, idx}, 1.0);   //Set label: [0,1] for negative, [1,0] for positive
            }
        }
        // create the classifier problem data
        return new DataSet(featureMatrix, labelsMatrix, null, null);
}
private void initCuda(){
 // I tried to either completely turn off periodic GC
 // or reduce the frequency
//      Nd4j.getMemoryManager().setAutoGcWindow(100000);
        Nd4j.getMemoryManager().togglePeriodicGc(false);
         CudaEnvironment.getInstance().getConfiguration()
                .allowMultiGPU(false);
//.setMaximumDeviceCache((long) (0.5 * 4 * 1024 * 1024 * 1024L))
}
	int numfilters = 128;
	int kernelSize = 5;
	int maxVocabIndex = 10000;
	int numClasses = 2;
	int embeddingLength = 50;
	//Set up network configuration
	ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder()
	// workspace settings for memory management
	.weightInit(WeightInit.RELU)
	.seed(seed)
	.updater(new Adam(0.01))
	.activation(Activation.RELU)
	.graphBuilder()
	.addInputs("input")
	.addLayer("embed", new EmbeddingLayer.Builder()
	.nIn(maxVocabIndex)
	.nOut(embeddingLength)
	.build(), "input")
	.addLayer("cnn1", new Convolution1DLayer.Builder()
	.nIn(embeddingLength)
	.nOut(numfilters)
	.kernelSize(kernelSize)
	.activation(Activation.RELU)
	.build(), "embed")
	.addLayer("globalPool1", new GlobalPoolingLayer.Builder()
	.poolingType(PoolingType.MAX)
	.build(), "cnn1")
	.addLayer("predict", new DenseLayer.Builder()
	.nIn(numfilters)
	.nOut(numClasses)
	.dropOut(0.9)
	.activation(Activation.SOFTMAX)
	.build(), "globalPool1")
	.addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT)
	.build(), "predict")
	.setOutputs("loss")
	.setInputTypes(InputType.recurrent(1, maxSequenceLength))
	.build();
	/**
	* @param docTokensMap map of docId to list of ordered token Indices from documents
	* @param trainingLabels map of docId to label
	* return dataset object
	*/
	private DataSet createVectors(Map<String,Integer> trainingLabels, Map<String,List<Integer>> docTokensMap){
	int rowIndex = 0;
	int maxLength = 128;
	INDArray featureMatrix = Nd4j.create(docTokensMap.size(), 1, maxLength);
	INDArray labelsMatrix = Nd4j.create(docTokensMap.size(), 2);
	for (String docId : docId) {
	List<Integer> tokIndices = docTokensMap.get(docId);
	//get sequence length to use it for truncation later
	int sequenceLength = Math.min(tokensIds.size(), maxLength);
	if (sequenceLength == 0) {
	continue;
	}
	for (int i = 0; i < tokIndices.size(); i++) {
	Integer index = tokIndices.get(i);
	featureMatrix.putScalar(new int[]{rowIndex, 0, i}, index);
	}
	//this is only done if we are in training mode
	if (train) {
	int idx = trainingLabels.get(docId) == 1 ? 0 : 1;
	labelsMatrix.putScalar(new int[]{rowIndex, idx}, 1.0); //Set label: [0,1] for negative, [1,0] for positive
	}
	}
	// create the classifier problem data
	return new DataSet(featureMatrix, labelsMatrix, null, null);
	}
	private void initCuda(){
	// I tried to either completely turn off periodic GC
	// or reduce the frequency
	// Nd4j.getMemoryManager().setAutoGcWindow(100000);
	Nd4j.getMemoryManager().togglePeriodicGc(false);
	CudaEnvironment.getInstance().getConfiguration()
	.allowMultiGPU(false);
	//.setMaximumDeviceCache((long) (0.5 * 4 * 1024 * 1024 * 1024L))
	}