johnmiedema/OpenNlpModelNERBookTItles

## OpenNlpModelNERBookTItles
//Create an OpenNLP model for Named Entity Recognition of Book Titles
//See tester at https://gist.github.com/johnmiedema/7e7330e1b9263267bdfc

package demoModelTrainer;

import java.io.File;
import java.io.FileOutputStream;
import java.util.Collections;

import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSampleDataStream;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;

public class BuildModelDefaultFeatures {

	public static void main(String[] args) {

		//load trained data into memory
		//titles marked up with <START> and <END> tags
		//one sentence per line
		File inFile = new File("titles.txt");

		//create NameSampleDataStream
		//converts tagged strings from trained data into NameSample objects
		//populated in next step

		NameSampleDataStream nss = null;
		try {
			nss = new NameSampleDataStream(
			new PlainTextByLineStream(
			new java.io.FileReader(inFile)));
		}
		catch (Exception ex) {
			System.out.println(ex.getMessage());
		}

		//create "title" model

		TokenNameFinderModel model = null;
		int iterations = 100;
		int cutoff = 5;

		try {
			model = NameFinderME.train(
			"en", //language of the training data (relevant to tokenization)
			"title", //type of model
			nss, //the NameSample collection, created above
			(AdaptiveFeatureGenerator) null, //null=use default set of feature generators for NE detection
			Collections.<String,Object>emptyMap(), //empty, not adding additional resources to the model
			iterations, //number of iterations before the model outputs, not important
			cutoff); //lower bound for the number of times a feature exists before it is included in the model
		}
		catch (Exception ex) {
			System.out.println(ex.getMessage());
		}

		//save the model to disk
		//used in testing and production

		File outFile = null;
		try {
			outFile = new File("en-title.bin");
			FileOutputStream outFileStream = new FileOutputStream(outFile);
			model.serialize(outFileStream);
		}
		catch (Exception ex) {
			System.out.println(ex.getMessage());
		}
	}
}
	//Create an OpenNLP model for Named Entity Recognition of Book Titles
	//See tester at https://gist.github.com/johnmiedema/7e7330e1b9263267bdfc

	package demoModelTrainer;

	import java.io.File;
	import java.io.FileOutputStream;
	import java.util.Collections;

	import opennlp.tools.namefind.NameFinderME;
	import opennlp.tools.namefind.NameSampleDataStream;
	import opennlp.tools.namefind.TokenNameFinderModel;
	import opennlp.tools.util.PlainTextByLineStream;
	import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;

	public class BuildModelDefaultFeatures {

	public static void main(String[] args) {

	//load trained data into memory
	//titles marked up with <START> and <END> tags
	//one sentence per line
	File inFile = new File("titles.txt");

	//create NameSampleDataStream
	//converts tagged strings from trained data into NameSample objects
	//populated in next step

	NameSampleDataStream nss = null;
	try {
	nss = new NameSampleDataStream(
	new PlainTextByLineStream(
	new java.io.FileReader(inFile)));
	}
	catch (Exception ex) {
	System.out.println(ex.getMessage());
	}

	//create "title" model

	TokenNameFinderModel model = null;
	int iterations = 100;
	int cutoff = 5;

	try {
	model = NameFinderME.train(
	"en", //language of the training data (relevant to tokenization)
	"title", //type of model
	nss, //the NameSample collection, created above
	(AdaptiveFeatureGenerator) null, //null=use default set of feature generators for NE detection
	Collections.<String,Object>emptyMap(), //empty, not adding additional resources to the model
	iterations, //number of iterations before the model outputs, not important
	cutoff); //lower bound for the number of times a feature exists before it is included in the model
	}
	catch (Exception ex) {
	System.out.println(ex.getMessage());
	}

	//save the model to disk
	//used in testing and production

	File outFile = null;
	try {
	outFile = new File("en-title.bin");
	FileOutputStream outFileStream = new FileOutputStream(outFile);
	model.serialize(outFileStream);
	}
	catch (Exception ex) {
	System.out.println(ex.getMessage());
	}
	}
	}