Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save tomzhang/0aff3f3acea1f5c40775442709629562 to your computer and use it in GitHub Desktop.
Save tomzhang/0aff3f3acea1f5c40775442709629562 to your computer and use it in GitHub Desktop.
Stanford Core NLP Server 模式的多语言版本。
package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.util.*;
import java.math.BigInteger;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.Consumer;
import java.util.function.Predicate;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import static edu.stanford.nlp.util.logging.Redwood.Util.*;
import static*;
* This class creates a server that runs a new Java annotator in each thread.
* @author Gabor Angeli
* @author Arun Chaganty
public class StanfordCoreNLPServerMultiLang implements Runnable {
protected HttpServer server;
@ArgumentParser.Option(name="port", gloss="The port to run the server on")
protected int serverPort = 9000;
@ArgumentParser.Option(name="timeout", gloss="The default timeout, in milliseconds")
protected int timeoutMilliseconds = 15000;
@ArgumentParser.Option(name="strict", gloss="If true, obey strict HTTP standards (e.g., with encoding)")
protected boolean strict = false;
@ArgumentParser.Option(name="quiet", gloss="If true, don't print to stdout")
protected boolean quiet = false;
@ArgumentParser.Option(name="ssl", gloss="If true, start the server with an [insecure!] SSL connection")
protected boolean ssl = false;
protected final String shutdownKey;
protected StanfordCoreNLP pipeline;
public static int MAX_CHAR_LENGTH = 100000;
public Properties props;
* The thread pool for the HTTP server.
private final ExecutorService serverExecutor = Executors.newFixedThreadPool(ArgumentParser.threads);
* To prevent grossly wasteful over-creation of pipeline objects, cache the last
* few we created, until the garbage collector decides we can kill them.
// private final WeakHashMap<Properties, StanfordCoreNLP> pipelineCache = new WeakHashMap<>();
* An executor to time out CoreNLP execution with.
private final ExecutorService corenlpExecutor = Executors.newFixedThreadPool(ArgumentParser.threads);
* Create a new Stanford CoreNLP Server.
* @param port The port to host the server from.
* @param timeout The timeout (in milliseconds) for each command.
* @param strict If true, conform more strictly to the HTTP spec (e.g., for character encoding).
* @throws IOException Thrown from the underlying socket implementation.
/*public StanfordCoreNLPServer(int port, int timeout, boolean strict) throws IOException {
this.serverPort = port;
this.timeoutMilliseconds = timeout;
this.strict = strict;
* Create a new Stanford CoreNLP Server, with the default parameters
* @throws IOException
public StanfordCoreNLPServerMultiLang(Properties props) throws IOException {
this.props = props;
// Generate and write a shutdown key
String tmpDir = System.getProperty("");
File tmpFile = new File(tmpDir + File.separator + "corenlp.shutdown");
if (tmpFile.exists()) {
if (!tmpFile.delete()) {
throw new IllegalStateException("Could not delete shutdown key file");
this.shutdownKey = new BigInteger(130, new Random()).toString(32);
IOUtils.writeStringToFile(shutdownKey, tmpFile.getPath(), "utf-8");
pipeline = new StanfordCoreNLP(props);
* Parse the URL parameters into a map of (key, value) pairs.
* @param uri The URL that was requested.
* @return A map of (key, value) pairs corresponding to the request parameters.
* @throws UnsupportedEncodingException Thrown if we could not decode the URL with utf8.
private static Map<String, String> getURLParams(URI uri) throws UnsupportedEncodingException {
if (uri.getQuery() != null) {
Map<String, String> urlParams = new HashMap<>();
String query = uri.getQuery();
String[] queryFields = query
.replaceAll("\\\\&", "___AMP___")
.replaceAll("\\\\+", "___PLUS___")
for (String queryField : queryFields) {
int firstEq = queryField.indexOf('=');
// Convention uses "+" for spaces.
String key = URLDecoder.decode(queryField.substring(0, firstEq), "utf8").replaceAll("___AMP___", "&").replaceAll("___PLUS___", "+");
String value = URLDecoder.decode(queryField.substring(firstEq + 1), "utf8").replaceAll("___AMP___", "&").replaceAll("___PLUS___", "+");
urlParams.put(key, value);
return urlParams;
} else {
return Collections.emptyMap();
* Reads the POST contents of the request and parses it into an Annotation object, ready to be annotated.
* This method can also read a serialized document, if the input format is set to be serialized.
* @param props The properties we are annotating with. This is where the input format is retrieved from.
* @param httpExchange The exchange we are reading POST data from.
* @return An Annotation representing the read document.
* @throws IOException Thrown if we cannot read the POST data.
* @throws ClassNotFoundException Thrown if we cannot load the serializer.
private Annotation getDocument(Properties props, HttpExchange httpExchange) throws IOException, ClassNotFoundException {
String inputFormat = props.getProperty("inputFormat", "text");
String date = props.getProperty("date");
switch (inputFormat) {
case "text":
// The default encoding by the HTTP standard is ISO-8859-1, but most
// real users of CoreNLP would likely assume UTF-8 by default.
String defaultEncoding = this.strict ? "ISO-8859-1" : "UTF-8";
// Get the encoding
Headers h = httpExchange.getRequestHeaders();
String encoding;
if (h.containsKey("Content-type")) {
String[] charsetPair = Arrays.asList(h.getFirst("Content-type").split(";")).stream()
.map(x -> x.split("="))
.filter(x -> x.length > 0 && "charset".equals(x[0]))
.findFirst().orElse(new String[]{"charset", defaultEncoding});
if (charsetPair.length == 2) {
encoding = charsetPair[1];
} else {
encoding = defaultEncoding;
} else {
encoding = defaultEncoding;
String text = IOUtils.slurpReader(IOUtils.encodedInputStreamReader(httpExchange.getRequestBody(), encoding));
text = URLDecoder.decode(text, encoding).trim();
// TODO(chaganty): URLdecode string.
// Read the annotation
Annotation annotation = new Annotation(text);
// Set the date (if provided)
if (date != null) {
annotation.set(CoreAnnotations.DocDateAnnotation.class, date);
return annotation;
case "serialized":
String inputSerializerName = props.getProperty("inputSerializer", ProtobufAnnotationSerializer.class.getName());
AnnotationSerializer serializer = MetaClass.create(inputSerializerName).createInstance();
Pair<Annotation, InputStream> pair =;
return pair.first;
throw new IOException("Could not parse input format: " + inputFormat);
* Create (or retrieve) a StanfordCoreNLP object corresponding to these properties.
* @param props The properties to create the object with.
* @return A pipeline parameterized by these properties.
/*private StanfordCoreNLP mkStanfordCoreNLP(Properties props) {
StanfordCoreNLP impl;
synchronized (pipelineCache) {
impl = pipelineCache.get(props);
if (impl == null) {
impl = new StanfordCoreNLP(props);
pipelineCache.put(props, impl);
return impl;
* A helper function to respond to a request with an error.
* @param response The description of the error to send to the user.
* @param httpExchange The exchange to send the error over.
* @throws IOException Thrown if the HttpExchange cannot communicate the error.
private static void respondError(String response, HttpExchange httpExchange) throws IOException {
httpExchange.getResponseHeaders().add("Content-type", "text/plain");
httpExchange.sendResponseHeaders(HTTP_INTERNAL_ERROR, response.length());
* A helper function to respond to a request with an error specifically indicating
* bad input from the user.
* @param response The description of the error to send to the user.
* @param httpExchange The exchange to send the error over.
* @throws IOException Thrown if the HttpExchange cannot communicate the error.
private static void respondBadInput(String response, HttpExchange httpExchange) throws IOException {
httpExchange.getResponseHeaders().add("Content-type", "text/plain");
httpExchange.sendResponseHeaders(HTTP_BAD_REQUEST, response.length());
* A helper function to respond to a request with an error stating that the user is not authorized
* to make this request.
* @param httpExchange The exchange to send the error over.
* @throws IOException Thrown if the HttpExchange cannot communicate the error.
private static void respondUnauthorized(HttpExchange httpExchange) throws IOException {
httpExchange.getResponseHeaders().add("Content-type", "application/javascript");
byte[] content = "{\"message\": \"Unauthorized API request\"}".getBytes("utf-8");
httpExchange.sendResponseHeaders(HTTP_UNAUTHORIZED, content.length);
* A callback object that lets us hook into the result of an annotation request.
public static class FinishedRequest {
public final Properties props;
public final Annotation document;
public final Optional<String> tokensregex;
public final Optional<String> semgrex;
public FinishedRequest(Properties props, Annotation document) {
this.props = props;
this.document = document;
this.tokensregex = Optional.empty();
this.semgrex = Optional.empty();
public FinishedRequest(Properties props, Annotation document, String tokensregex, String semgrex) {
this.props = props;
this.document = document;
this.tokensregex = Optional.ofNullable(tokensregex);
this.semgrex = Optional.ofNullable(semgrex);
* A simple ping test. Responds with pong.
protected static class PingHandler implements HttpHandler {
public void handle(HttpExchange httpExchange) throws IOException {
// Return a simple text message that says pong.
httpExchange.getResponseHeaders().set("Content-type", "text/plain");
String response = "pong\n";
httpExchange.sendResponseHeaders(HTTP_OK, response.getBytes().length);
* Sending the appropriate shutdown key will gracefully shutdown the server.
* This key is, by default, saved into the local file /tmp/corenlp.shutdown on the
* machine the server was run from.
protected class ShutdownHandler implements HttpHandler {
public void handle(HttpExchange httpExchange) throws IOException {
Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI());
httpExchange.getResponseHeaders().set("Content-type", "text/plain");
boolean doExit = false;
String response = "Invalid shutdown key\n";
if (urlParams.containsKey("key") && urlParams.get("key").equals(shutdownKey)) {
response = "Shutdown successful!\n";
doExit = true;
httpExchange.sendResponseHeaders(HTTP_OK, response.getBytes().length);
if (doExit) {
* Serve a file from the filesystem or classpath
public static class FileHandler implements HttpHandler {
private final String content;
public FileHandler(String fileOrClasspath) throws IOException {
this.content = IOUtils.slurpReader(IOUtils.readerFromString(fileOrClasspath));
public void handle(HttpExchange httpExchange) throws IOException {
httpExchange.getResponseHeaders().set("Content-type", "text/html");
httpExchange.sendResponseHeaders(HTTP_OK, content.getBytes().length);
* The main handler for taking an annotation request, and annotating it.
protected class CoreNLPHandler implements HttpHandler {
* The default properties to use in the absence of anything sent by the client.
public final Properties defaultProps;
* An authenticator to determine if we can perform this API request.
private final Predicate<Properties> authenticator;
* A callback to call when an annotation job has finished.
private final Consumer<FinishedRequest> callback;
private final FileHandler homepage;
private StanfordCoreNLP pipeline;
* Create a handler for accepting annotation requests.
* @param props The properties file to use as the default if none were sent by the client.
public CoreNLPHandler(Properties props,
StanfordCoreNLP pipeline,
Predicate<Properties> authenticator,
Consumer<FinishedRequest> callback,
FileHandler homepage) {
this.defaultProps = props;
this.pipeline = pipeline;
this.callback = callback;
this.authenticator = authenticator;
this.homepage = homepage;
* Get the response data type to send to the client, based off of the output format requested from
* CoreNLP.
* @param props The properties being used by CoreNLP.
* @param of The output format being output by CoreNLP.
* @return An identifier for the type of the HTTP response (e.g., 'text/json').
public String getContentType(Properties props, StanfordCoreNLP.OutputFormat of) {
switch(of) {
case JSON:
return "application/json";
case TEXT:
case CONLL:
return "text/plain";
case XML:
return "text/xml";
String outputSerializerName = props.getProperty("outputSerializer");
if (outputSerializerName != null &&
outputSerializerName.equals(ProtobufAnnotationSerializer.class.getName())) {
return "application/x-protobuf";
//noinspection fallthrough
return "application/octet-stream";
public void handle(HttpExchange httpExchange) throws IOException {
// Set common response headers
httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");
// Get sentence.
//Properties props;
Annotation ann;
StanfordCoreNLP.OutputFormat of;
try {
//props = getProperties(httpExchange);
if ("GET".equalsIgnoreCase(httpExchange.getRequestMethod())) {
// Handle direct browser connections (i.e., not a POST request).
} else {
// Handle API request
if (authenticator != null && !authenticator.test(props)) {
log("[" + httpExchange.getRemoteAddress() + "] API call w/annotators " + props.getProperty("annotators", "<unknown>"));
ann = getDocument(props, httpExchange);
of = StanfordCoreNLP.OutputFormat.valueOf(props.getProperty("outputFormat", "json").toUpperCase());
String text = ann.get(CoreAnnotations.TextAnnotation.class).replace('\n', ' ');
if (!quiet) {
if (text.length() > MAX_CHAR_LENGTH) {
respondBadInput("Request is too long to be handled by server: " + text.length() + " characters. Max length is " + MAX_CHAR_LENGTH + " characters.", httpExchange);
} catch (Exception e) {
respondError("Could not handle incoming annotation", httpExchange);
Future<Annotation> completedAnnotationFuture = null;
try {
// Annotate
completedAnnotationFuture = corenlpExecutor.submit(() -> {
return ann;
Annotation completedAnnotation;
try {
int timeoutMilliseconds = Integer.parseInt(props.getProperty("timeout",
// Check for too long a timeout from an unauthorized source
if (timeoutMilliseconds > 15000) {
// If two conditions:
// (1) The server is running on (i.e.,
// (2) The request is not coming from a *" email address
// Then force the timeout to be 15 seconds
if ("".equals(InetAddress.getLocalHost().getHostName()) &&
!httpExchange.getRemoteAddress().getHostName().toLowerCase().endsWith("")) {
timeoutMilliseconds = 15000;
completedAnnotation = completedAnnotationFuture.get(timeoutMilliseconds, TimeUnit.MILLISECONDS);
} catch (NumberFormatException e) {
completedAnnotation = completedAnnotationFuture.get(StanfordCoreNLPServerMultiLang.this.timeoutMilliseconds, TimeUnit.MILLISECONDS);
completedAnnotationFuture = null; // No longer any need for the future
// Get output
ByteArrayOutputStream os = new ByteArrayOutputStream();
AnnotationOutputter.Options options = AnnotationOutputter.getOptions(pipeline);
StanfordCoreNLP.createOutputter(props, options).accept(completedAnnotation, os);
byte[] response = os.toByteArray();
String contentType = getContentType(props, of);
if (contentType.equals("application/json") || contentType.startsWith("text/")) {
contentType += ";charset=" + options.encoding;
httpExchange.getResponseHeaders().add("Content-type", contentType);
httpExchange.getResponseHeaders().add("Content-length", Integer.toString(response.length));
httpExchange.sendResponseHeaders(HTTP_OK, response.length);
if (completedAnnotation != null && props.getProperty("annotators") != null && !"".equals(props.getProperty("annotators"))) {
callback.accept(new FinishedRequest(props, completedAnnotation));
} catch (TimeoutException e) {
// Print the stack trace for debugging
// Return error message.
respondError("CoreNLP request timed out. Your document may be too long.", httpExchange);
// Cancel the future if it's alive
//noinspection ConstantConditions
if (completedAnnotationFuture != null) {
} catch (Exception e) {
// Print the stack trace for debugging
// Return error message.
respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
// Cancel the future if it's alive
//noinspection ConstantConditions
if (completedAnnotationFuture != null) { // just in case...
* Parse the parameters of a connection into a CoreNLP properties file that can be passed into
* {@link StanfordCoreNLP}, and used in the I/O stages.
* @param httpExchange The http exchange; effectively, the request information.
* @return A {@link Properties} object corresponding to a combination of default and passed properties.
* @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with UTF-8.
private Properties getProperties(HttpExchange httpExchange) throws UnsupportedEncodingException {
//Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI());
// Load the default properties
Properties props = new Properties();
.forEach(entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString()));
// Add GET parameters as properties
.filter(entry ->
!"properties".equalsIgnoreCase(entry.getKey()) &&
.forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
// Try to get more properties from query string.
// (get the properties from the URL params)
/*Map<String, String> urlProperties = new HashMap<>();
if (urlParams.containsKey("properties")) {
urlProperties = StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"));
} else if (urlParams.containsKey("props")) {
urlProperties = StringUtils.decodeMap(URLDecoder.decode(urlParams.get("props"), "UTF-8"));
// (tweak the default properties a bit)
if (!props.containsKey("")) {
// Set coref head to use dependencies
props.setProperty("", "dep");
if (urlProperties.containsKey("annotators") && urlProperties.get("annotators") != null &&
ArrayUtils.contains(urlProperties.get("annotators").split(","), "parse")) {
// (case: the properties have a parse annotator --
// we don't have to use the dependency mention finder)
// (add new properties on top of the default properties)
.forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
// Get the annotators
String annotators = props.getProperty("annotators");
if (PropertiesUtils.getBool(props, "enforceRequirements", true)) {
annotators = StanfordCoreNLP.ensurePrerequisiteAnnotators(props.getProperty("annotators").split("[, \t]+"));
// Make sure the properties compile
props.setProperty("annotators", annotators);*/
return props;
private static void sendAndGetResponse(HttpExchange httpExchange, byte[] response) throws IOException {
if (response.length > 0) {
httpExchange.getResponseHeaders().add("Content-type", "application/json");
httpExchange.getResponseHeaders().add("Content-length", Integer.toString(response.length));
httpExchange.sendResponseHeaders(HTTP_OK, response.length);
private HttpsServer addSSLContext(HttpsServer server) {
log("Adding SSL context to server");
try {
KeyStore ks = KeyStore.getInstance("JKS");
ks.load(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem("edu/stanford/nlp/pipeline/corenlp.jks"), "corenlp".toCharArray());
KeyManagerFactory kmf = KeyManagerFactory.getInstance("SunX509");
kmf.init(ks, "corenlp".toCharArray());
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(kmf.getKeyManagers(), null, null);
// Add SSL support to the server
server.setHttpsConfigurator(new HttpsConfigurator(sslContext) {
public void configure(HttpsParameters params) {
SSLContext context = getSSLContext();
SSLEngine engine = context.createSSLEngine();
// Return
return server;
} catch (CertificateException | IOException | KeyStoreException | NoSuchAlgorithmException | KeyManagementException | UnrecoverableKeyException e) {
throw new RuntimeException(e);
* Returns the implementing Http server.
public Optional<HttpServer> getServer() {
return Optional.ofNullable(server);
/** @see StanfordCoreNLPServer#run(Predicate, Consumer, StanfordCoreNLPServer.FileHandler, boolean) */
public void run() {
// Set the static page handler
try {
FileHandler homepage = new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.html");
run(req -> true, obj -> {}, homepage, false);
} catch (IOException e) {
throw new RuntimeIOException(e);
* Run the server.
* This method registers the handlers, and initializes the HTTP server.
public void run(Predicate<Properties> authenticator, Consumer<FinishedRequest> callback, FileHandler homepage, boolean https) {
try {
if (https) {
server = addSSLContext(HttpsServer.create(new InetSocketAddress(serverPort), 0)); // 0 is the default 'backlog'
} else {
server = HttpServer.create(new InetSocketAddress(serverPort), 0); // 0 is the default 'backlog'
server.createContext("/", new CoreNLPHandler(props, pipeline, authenticator, callback, homepage));
server.createContext("/corenlp-brat.js", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.js"));
server.createContext("/corenlp-brat.cs", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.css"));
server.createContext("/ping", new PingHandler());
server.createContext("/shutdown", new ShutdownHandler());
log("StanfordCoreNLPServer listening at " + server.getAddress());
} catch (IOException e) {
* The main method.
* Read the command line arguments and run the server.
* @param args The command line arguments
* @throws IOException Thrown if we could not start / run the server.
public static void main(String[] args) throws IOException {
FileHandler homepage;
try {
homepage = new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.html");
} catch (IOException e) {
throw new RuntimeIOException(e);
Properties props = null;
// this was taken directly from edu.stanford.nlp.pipeline.StanfordCoreNLP.main
props = StringUtils.argsToProperties(args);
// if (!props.containsKey("props"))
// {
// error("props must be provided");
// return;
// }
// Run the server
log("Starting server...");
StanfordCoreNLPServerMultiLang server = new StanfordCoreNLPServerMultiLang(props);
ArgumentParser.fillOptions(server, args);
if (server.ssl) { -> true, res -> {}, homepage, true);
} else { -> true, res -> {}, homepage, false);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment