Skip to content

Instantly share code, notes, and snippets.

@erikfrey
Created September 6, 2012 23:24
Show Gist options
  • Save erikfrey/3661269 to your computer and use it in GitHub Desktop.
Save erikfrey/3661269 to your computer and use it in GitHub Desktop.
package org.maltparser.core.config;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.flow.FlowChartInstance;
import org.maltparser.core.flow.item.ChartItem;
import org.maltparser.core.flow.spec.ChartItemSpecification;
import org.maltparser.core.helper.SystemInfo;
import org.maltparser.core.helper.SystemLogger;
import org.maltparser.core.options.OptionManager;
/**
*
*
* @author Johan Hall
*/
public class ConfigDirChartItem extends ChartItem {
private String idName;
private String taskName;
private String optionFileName;
private URL configDirURL;
private String configDirName;
private ConfigurationDir configDir;
private String outCharSet;
private String inCharSet;
public ConfigDirChartItem() {}
public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
super.initialize(flowChartinstance, chartItemSpecification);
for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
if (key.equals("id")) {
idName = chartItemSpecification.getChartItemAttributes().get(key);
} else if (key.equals("task")) {
taskName = chartItemSpecification.getChartItemAttributes().get(key);
}
}
if (idName == null) {
idName = getChartElement("configdir").getAttributes().get("id").getDefaultValue();
} else if (taskName == null) {
taskName = getChartElement("configdir").getAttributes().get("task").getDefaultValue();
}
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "url") != null && OptionManager.instance().getOptionValue(getOptionContainerIndex(),"config", "url").toString().length() > 0) {
try {
configDirURL = new URL(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "url").toString());
} catch (MalformedURLException e) {
throw new ConfigurationException("The URL '"+OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "url").toString()+"' is malformed. ", e);
}
}
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString().endsWith(".mco")) {
int index = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString().lastIndexOf('.');
configDirName = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString().substring(0,index);
} else {
configDirName = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString();
}
try {
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "system", "option_file") != null) {
optionFileName = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "system", "option_file").toString();
}
} catch (ConfigurationException e) {
throw new ConfigurationException("The option file '"+optionFileName+"' could not be copied. ",e);
}
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "output", "charset") != null) {
outCharSet = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "output", "charset").toString();
} else {
outCharSet = "UTF-8";
}
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "input", "charset") != null) {
inCharSet = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "input", "charset").toString();
} else {
inCharSet = "UTF-8";
}
configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
if (configDir == null) {
if (configDirURL != null) {
configDir = new ConfigurationDir(configDirURL);
} else {
configDir = new ConfigurationDir(configDirName, idName, getOptionContainerIndex());
}
flowChartinstance.addFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName, configDir);
}
if (taskName.equals("versioning")) {
configDir.versioning();
} else if (taskName.equals("loadsavedoptions")) {
configDir.initCreatedByMaltParserVersionFromInfoFile();
if (configDir.getCreatedByMaltParserVersion() == null) {
SystemLogger.logger().warn("Couln't determine which version of MaltParser that created the parser model: " + configDirName+ ".mco\n MaltParser will terminate\n");
System.exit(1);
} else if (!configDir.getCreatedByMaltParserVersion().startsWith(SystemInfo.getVersion())) {
SystemLogger.logger().error("The parser model '"+ configDirName+ ".mco' is created by MaltParser "+configDir.getCreatedByMaltParserVersion()+".\n");
SystemLogger.logger().error("You have re-train the parser model to be able to parse with current version of MaltParser.");
System.exit(1);
}
OptionManager.instance().loadOptions(getOptionContainerIndex(), configDir.getInputStreamReaderFromConfigFile("savedoptions.sop"));
configDir.initDataFormat();
} else if (taskName.equals("createdir")) {
configDir.setCreatedByMaltParserVersion(SystemInfo.getVersion());
configDir.createConfigDirectory();
if (optionFileName != null && optionFileName.length() > 0) {
configDir.copyToConfig(new File(optionFileName));
}
configDir.initDataFormat();
}
}
public int preprocess(int signal) throws MaltChainedException {
if (taskName.equals("unpack")) {
SystemLogger.logger().info("Unpacking the parser model '"+ configDirName+ ".mco' ...\n");
configDir.unpackConfigFile();
} else if (taskName.equals("info")) {
configDir.echoInfoFile();
} else if (taskName.equals("loadsymboltables")) {
configDir.getSymbolTables().load(configDir.getInputStreamReaderFromConfigFileEntry("symboltables.sym",inCharSet));
}
return signal;
}
public int process(int signal) throws MaltChainedException {
return signal;
}
public int postprocess(int signal) throws MaltChainedException {
if (taskName.equals("createfile")) {
OptionManager.instance().saveOptions(getOptionContainerIndex(), configDir.getOutputStreamWriter("savedoptions.sop"));
configDir.createConfigFile();
} else if (taskName.equals("deletedir")) {
configDir.terminate();
configDir.deleteConfigDirectory();
} else if (taskName.equals("savesymboltables")) {
configDir.getSymbolTables().save(configDir.getOutputStreamWriter("symboltables.sym", outCharSet));
}
return signal;
}
public void terminate() throws MaltChainedException { }
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
return obj.toString().equals(this.toString());
}
public int hashCode() {
return 217 + (null == toString() ? 0 : toString().hashCode());
}
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append(" configdir ");
sb.append("id:");sb.append(idName);
sb.append(' ');
sb.append("task:");sb.append(taskName);
return sb.toString();
}
}
package org.maltparser.core.helper;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.jar.Attributes;
import java.util.jar.JarFile;
import java.util.jar.Manifest;
import java.util.regex.Pattern;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.options.OptionManager;
/**
*
*
* @author Johan Hall
*/
public class SystemInfo {
private static SystemInfo uniqueInstance = new SystemInfo();
private static String version;
private static String buildDate;
private static Attributes manifestAttributes;
private static File maltJarPath;
private SystemInfo() {
Pattern MALTJAR = Pattern.compile("^.*malt[^" + File.separator
+ "]*\\.jar$");
try {
getManifestInfo();
String[] jarfiles = System.getProperty("java.class.path").split(
File.pathSeparator);
for (int i = 0; i < jarfiles.length; i++) {
if (MALTJAR.matcher(jarfiles[i]).matches()) {
maltJarPath = new File(new File(jarfiles[i])
.getAbsolutePath());
}
}
} catch (MaltChainedException e) {
if (SystemLogger.logger().isDebugEnabled()) {
SystemLogger.logger().debug("", e);
} else {
SystemLogger.logger().error(e.getMessageChain());
}
System.exit(1);
}
}
/**
* Returns a reference to the single instance.
*/
public static SystemInfo instance() {
return uniqueInstance;
}
/**
* Returns the application header
*
* @return the application header
*/
public static String header() {
StringBuilder sb = new StringBuilder();
sb
.append("-----------------------------------------------------------------------------\n"
+ " MaltParser "+ version + " \n"
+ "-----------------------------------------------------------------------------\n"
+ " MALT (Models and Algorithms for Language Technology) Group \n"
+ " Vaxjo University and Uppsala University \n"
+ " Sweden \n"
+ "-----------------------------------------------------------------------------\n");
return sb.toString();
}
/**
* Returns a short version of the help
*
* @return a short version of the help
*/
public static String shortHelp() {
StringBuilder sb = new StringBuilder();
sb.append("\n"
+ "Usage: \n"
+ " java -jar malt.jar -f <path to option file> <options>\n"
+ " java -jar malt.jar -h for more help and options\n\n"
+ OptionManager.instance().getOptionDescriptions()
.toStringOptionGroup("system")
+ "Documentation: docs/index.html\n");
return sb.toString();
}
/**
* Returns a set of attributes present in the jar manifest file
*
* @return a set of attributes present in the jar manifest file
*/
public static Attributes getManifestAttributes() {
return manifestAttributes;
}
/**
* Returns the version number as string
*
* @return the version number as string
*/
public static String getVersion() {
return version;
}
/**
* Returns the build date
*
* @return the build date
*/
public static String getBuildDate() {
return buildDate;
}
public static File getMaltJarPath() {
return maltJarPath;
}
/**
* Loads the manifest attributes from the manifest in the jar-file
*
* @throws MaltChainedException
*/
private void getManifestInfo() throws MaltChainedException {
version = "1.5";
buildDate = "meow";
try {
URL codeBase = SystemInfo.class.getProtectionDomain()
.getCodeSource().getLocation();
if (codeBase != null && codeBase.getPath().endsWith(".jar")) {
JarFile jarfile = new JarFile(URLDecoder.decode(codeBase
.getPath(), java.nio.charset.Charset.defaultCharset()
.name()));
Manifest manifest = jarfile.getManifest();
Attributes manifestAttributes = manifest.getMainAttributes();
version = manifestAttributes.getValue("Implementation-Version");
buildDate = manifestAttributes.getValue("Build-Date");
}
} catch (IOException e) {
version = "";
buildDate = "Not available";
e.printStackTrace();
}
}
}
package org.maltparser.core.helper;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.log4j.Logger;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.plugin.Plugin;
import org.maltparser.core.plugin.PluginLoader;
/**
*
*
* @author Johan Hall
*/
public class Util {
private static final int BUFFER = 4096;
private static final char AMP_CHAR = '&';
private static final char LT_CHAR = '<';
private static final char GT_CHAR = '>';
private static final char QUOT_CHAR = '"';
private static final char APOS_CHAR = '\'';
public static String xmlEscape(String str) {
boolean needEscape = false;
char c;
for (int i = 0; i < str.length(); i++) {
c = str.charAt(i);
if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) {
needEscape = true;
break;
}
}
if (!needEscape) {
return str;
}
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
c = str.charAt(i);
if (str.charAt(i) == AMP_CHAR) {
sb.append("&amp;");
} else if ( str.charAt(i) == LT_CHAR) {
sb.append("&lt;");
} else if (str.charAt(i) == GT_CHAR) {
sb.append("&gt;");
} else if (str.charAt(i) == QUOT_CHAR) {
sb.append("&quot;");
} else if (str.charAt(i) == APOS_CHAR) {
sb.append("&apos;");
} else {
sb.append(c);
}
}
return sb.toString();
}
/**
* Search for a file according the following priority:
* <ol>
* <li>The local file system
* <li>Specified as an URL (starting with http:, file:, ftp: or jar:
* <li>MaltParser distribution file (malt.jar)
* <li>MaltParser plugins
* </ol>
*
* If the file string is found, an URL object is returned, otherwise <b>null</b>
*
* @param fileString the file string to convert into an URL.
* @return an URL object, if the file string is found, otherwise <b>null</b>
* @throws MaltChainedException
*/
public static URL findURL(String fileString) throws MaltChainedException {
File specFile = new File(fileString);
try {
if (specFile.exists()) {
// found the file in the file system
return new URL("file:///"+specFile.getAbsolutePath());
} else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) {
// the input string is an URL string starting with http, file, ftp or jar
return new URL(fileString);
} else {
return findURLinJars(fileString);
}
} catch (MalformedURLException e) {
throw new MaltChainedException("Malformed URL: "+fileString, e);
}
}
public static URL findURLinJars(String fileString) throws MaltChainedException {
try {
// search in malt.jar and its plugins
if (Util.class.getResource(fileString) != null) {
// found the input string in the malt.jar file
return Util.class.getResource(fileString);
} else {
for (Plugin plugin : PluginLoader.instance()) {
URL url = null;
if (!fileString.startsWith("/")) {
url = new URL("jar:"+plugin.getUrl() + "!/" + fileString);
} else {
url = new URL("jar:"+plugin.getUrl() + "!" + fileString);
}
try {
InputStream is = url.openStream();
is.close();
} catch (IOException e) {
continue;
}
// found the input string in one of the plugins
return url;
}
// could not convert the input string into an URL
return null;
}
} catch (MalformedURLException e) {
throw new MaltChainedException("Malformed URL: "+fileString, e);
}
}
public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
logger.info(".");
int tic = inTic + 1;
if (tic >= nTicxRow) {
ticInfo(logger, startTime, subject);
tic = 0;
}
return tic;
}
public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) {
logger.info(".");
for (int i = 1; i <= nTicxRow; i++) {
logger.info(" ");
}
ticInfo(logger, startTime, subject);
}
public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
for (int i = inTic; i <= nTicxRow; i++) {
logger.info(" ");
}
ticInfo(logger, startTime, subject);
}
private static void ticInfo(Logger logger, long startTime, int subject) {
logger.info("\t");
int a = 1000000;
if (subject != 0) {
while (subject/a == 0) {
logger.info(" ");
a /= 10;
}
} else {
logger.info(" ");
}
logger.info(subject);
logger.info("\t");
long time = (System.currentTimeMillis()-startTime)/1000;
a = 1000000;
if (time != 0) {
while (time/a == 0 ) {
logger.info(" ");
a /= 10;
}
logger.info(time);
logger.info("s");
} else {
logger.info(" 0s");
}
logger.info("\t");
long memory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1000000;
a = 1000000;
if (memory != 0) {
while (memory/a == 0 ) {
logger.info(" ");
a /= 10;
}
logger.info(memory);
logger.info("MB\n");
} else {
logger.info(" 0MB\n");
}
}
public static void copyfile(String source, String destination) throws MaltChainedException {
try {
byte[] readBuffer = new byte[BUFFER];
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source));
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
int n = 0;
while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
bos.write(readBuffer, 0, n);
}
bos.flush();
bos.close();
bis.close();
} catch (FileNotFoundException e) {
throw new MaltChainedException("The destination file '"+destination+"' cannot be created when coping the file. ", e);
} catch (IOException e) {
throw new MaltChainedException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e);
}
}
/**
* @param s the string to parse for the double value
* @throws IllegalArgumentException if s is empty or represents NaN or Infinity
* @throws NumberFormatException see {@link Double#parseDouble(String)}
*/
public static double atof(String s) {
if (s == null || s.length() < 1) throw new IllegalArgumentException("Can't convert empty string to integer");
double d = Double.parseDouble(s);
if (Double.isNaN(d) || Double.isInfinite(d)) {
throw new IllegalArgumentException("NaN or Infinity in input: " + s);
}
return (d);
}
/**
* @param s the string to parse for the integer value
* @throws IllegalArgumentException if s is empty
* @throws NumberFormatException see {@link Integer#parseInt(String)}
*/
public static int atoi(String s) throws NumberFormatException {
if (s == null || s.length() < 1) throw new IllegalArgumentException("Can't convert empty string to integer");
// Integer.parseInt doesn't accept '+' prefixed strings
if (s.charAt(0) == '+') s = s.substring(1);
return Integer.parseInt(s);
}
public static void closeQuietly(Closeable c) {
if (c == null) return;
try {
c.close();
} catch (Throwable t) {}
}
public static double[] copyOf(double[] original, int newLength) {
double[] copy = new double[newLength];
System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength));
return copy;
}
public static int[] copyOf(int[] original, int newLength) {
int[] copy = new int[newLength];
System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength));
return copy;
}
public static boolean equals(double[] a, double[] a2) {
if (a == a2) return true;
if (a == null || a2 == null) return false;
int length = a.length;
if (a2.length != length) return false;
for (int i = 0; i < length; i++)
if (a[i] != a2[i]) return false;
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment