Created
September 6, 2012 23:24
-
-
Save erikfrey/3661269 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.maltparser.core.config; | |
import java.io.File; | |
import java.net.MalformedURLException; | |
import java.net.URL; | |
import org.maltparser.core.exception.MaltChainedException; | |
import org.maltparser.core.flow.FlowChartInstance; | |
import org.maltparser.core.flow.item.ChartItem; | |
import org.maltparser.core.flow.spec.ChartItemSpecification; | |
import org.maltparser.core.helper.SystemInfo; | |
import org.maltparser.core.helper.SystemLogger; | |
import org.maltparser.core.options.OptionManager; | |
/** | |
* | |
* | |
* @author Johan Hall | |
*/ | |
public class ConfigDirChartItem extends ChartItem { | |
private String idName; | |
private String taskName; | |
private String optionFileName; | |
private URL configDirURL; | |
private String configDirName; | |
private ConfigurationDir configDir; | |
private String outCharSet; | |
private String inCharSet; | |
public ConfigDirChartItem() {} | |
public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException { | |
super.initialize(flowChartinstance, chartItemSpecification); | |
for (String key : chartItemSpecification.getChartItemAttributes().keySet()) { | |
if (key.equals("id")) { | |
idName = chartItemSpecification.getChartItemAttributes().get(key); | |
} else if (key.equals("task")) { | |
taskName = chartItemSpecification.getChartItemAttributes().get(key); | |
} | |
} | |
if (idName == null) { | |
idName = getChartElement("configdir").getAttributes().get("id").getDefaultValue(); | |
} else if (taskName == null) { | |
taskName = getChartElement("configdir").getAttributes().get("task").getDefaultValue(); | |
} | |
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "url") != null && OptionManager.instance().getOptionValue(getOptionContainerIndex(),"config", "url").toString().length() > 0) { | |
try { | |
configDirURL = new URL(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "url").toString()); | |
} catch (MalformedURLException e) { | |
throw new ConfigurationException("The URL '"+OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "url").toString()+"' is malformed. ", e); | |
} | |
} | |
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString().endsWith(".mco")) { | |
int index = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString().lastIndexOf('.'); | |
configDirName = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString().substring(0,index); | |
} else { | |
configDirName = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "config", "name").toString(); | |
} | |
try { | |
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "system", "option_file") != null) { | |
optionFileName = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "system", "option_file").toString(); | |
} | |
} catch (ConfigurationException e) { | |
throw new ConfigurationException("The option file '"+optionFileName+"' could not be copied. ",e); | |
} | |
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "output", "charset") != null) { | |
outCharSet = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "output", "charset").toString(); | |
} else { | |
outCharSet = "UTF-8"; | |
} | |
if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), "input", "charset") != null) { | |
inCharSet = OptionManager.instance().getOptionValue(getOptionContainerIndex(), "input", "charset").toString(); | |
} else { | |
inCharSet = "UTF-8"; | |
} | |
configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName); | |
if (configDir == null) { | |
if (configDirURL != null) { | |
configDir = new ConfigurationDir(configDirURL); | |
} else { | |
configDir = new ConfigurationDir(configDirName, idName, getOptionContainerIndex()); | |
} | |
flowChartinstance.addFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName, configDir); | |
} | |
if (taskName.equals("versioning")) { | |
configDir.versioning(); | |
} else if (taskName.equals("loadsavedoptions")) { | |
configDir.initCreatedByMaltParserVersionFromInfoFile(); | |
if (configDir.getCreatedByMaltParserVersion() == null) { | |
SystemLogger.logger().warn("Couln't determine which version of MaltParser that created the parser model: " + configDirName+ ".mco\n MaltParser will terminate\n"); | |
System.exit(1); | |
} else if (!configDir.getCreatedByMaltParserVersion().startsWith(SystemInfo.getVersion())) { | |
SystemLogger.logger().error("The parser model '"+ configDirName+ ".mco' is created by MaltParser "+configDir.getCreatedByMaltParserVersion()+".\n"); | |
SystemLogger.logger().error("You have re-train the parser model to be able to parse with current version of MaltParser."); | |
System.exit(1); | |
} | |
OptionManager.instance().loadOptions(getOptionContainerIndex(), configDir.getInputStreamReaderFromConfigFile("savedoptions.sop")); | |
configDir.initDataFormat(); | |
} else if (taskName.equals("createdir")) { | |
configDir.setCreatedByMaltParserVersion(SystemInfo.getVersion()); | |
configDir.createConfigDirectory(); | |
if (optionFileName != null && optionFileName.length() > 0) { | |
configDir.copyToConfig(new File(optionFileName)); | |
} | |
configDir.initDataFormat(); | |
} | |
} | |
public int preprocess(int signal) throws MaltChainedException { | |
if (taskName.equals("unpack")) { | |
SystemLogger.logger().info("Unpacking the parser model '"+ configDirName+ ".mco' ...\n"); | |
configDir.unpackConfigFile(); | |
} else if (taskName.equals("info")) { | |
configDir.echoInfoFile(); | |
} else if (taskName.equals("loadsymboltables")) { | |
configDir.getSymbolTables().load(configDir.getInputStreamReaderFromConfigFileEntry("symboltables.sym",inCharSet)); | |
} | |
return signal; | |
} | |
public int process(int signal) throws MaltChainedException { | |
return signal; | |
} | |
public int postprocess(int signal) throws MaltChainedException { | |
if (taskName.equals("createfile")) { | |
OptionManager.instance().saveOptions(getOptionContainerIndex(), configDir.getOutputStreamWriter("savedoptions.sop")); | |
configDir.createConfigFile(); | |
} else if (taskName.equals("deletedir")) { | |
configDir.terminate(); | |
configDir.deleteConfigDirectory(); | |
} else if (taskName.equals("savesymboltables")) { | |
configDir.getSymbolTables().save(configDir.getOutputStreamWriter("symboltables.sym", outCharSet)); | |
} | |
return signal; | |
} | |
public void terminate() throws MaltChainedException { } | |
public boolean equals(Object obj) { | |
if (this == obj) | |
return true; | |
if (obj == null) | |
return false; | |
if (getClass() != obj.getClass()) | |
return false; | |
return obj.toString().equals(this.toString()); | |
} | |
public int hashCode() { | |
return 217 + (null == toString() ? 0 : toString().hashCode()); | |
} | |
public String toString() { | |
final StringBuilder sb = new StringBuilder(); | |
sb.append(" configdir "); | |
sb.append("id:");sb.append(idName); | |
sb.append(' '); | |
sb.append("task:");sb.append(taskName); | |
return sb.toString(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.maltparser.core.helper; | |
import java.io.File; | |
import java.io.IOException; | |
import java.net.URL; | |
import java.net.URLDecoder; | |
import java.util.jar.Attributes; | |
import java.util.jar.JarFile; | |
import java.util.jar.Manifest; | |
import java.util.regex.Pattern; | |
import org.maltparser.core.exception.MaltChainedException; | |
import org.maltparser.core.options.OptionManager; | |
/** | |
* | |
* | |
* @author Johan Hall | |
*/ | |
public class SystemInfo { | |
private static SystemInfo uniqueInstance = new SystemInfo(); | |
private static String version; | |
private static String buildDate; | |
private static Attributes manifestAttributes; | |
private static File maltJarPath; | |
private SystemInfo() { | |
Pattern MALTJAR = Pattern.compile("^.*malt[^" + File.separator | |
+ "]*\\.jar$"); | |
try { | |
getManifestInfo(); | |
String[] jarfiles = System.getProperty("java.class.path").split( | |
File.pathSeparator); | |
for (int i = 0; i < jarfiles.length; i++) { | |
if (MALTJAR.matcher(jarfiles[i]).matches()) { | |
maltJarPath = new File(new File(jarfiles[i]) | |
.getAbsolutePath()); | |
} | |
} | |
} catch (MaltChainedException e) { | |
if (SystemLogger.logger().isDebugEnabled()) { | |
SystemLogger.logger().debug("", e); | |
} else { | |
SystemLogger.logger().error(e.getMessageChain()); | |
} | |
System.exit(1); | |
} | |
} | |
/** | |
* Returns a reference to the single instance. | |
*/ | |
public static SystemInfo instance() { | |
return uniqueInstance; | |
} | |
/** | |
* Returns the application header | |
* | |
* @return the application header | |
*/ | |
public static String header() { | |
StringBuilder sb = new StringBuilder(); | |
sb | |
.append("-----------------------------------------------------------------------------\n" | |
+ " MaltParser "+ version + " \n" | |
+ "-----------------------------------------------------------------------------\n" | |
+ " MALT (Models and Algorithms for Language Technology) Group \n" | |
+ " Vaxjo University and Uppsala University \n" | |
+ " Sweden \n" | |
+ "-----------------------------------------------------------------------------\n"); | |
return sb.toString(); | |
} | |
/** | |
* Returns a short version of the help | |
* | |
* @return a short version of the help | |
*/ | |
public static String shortHelp() { | |
StringBuilder sb = new StringBuilder(); | |
sb.append("\n" | |
+ "Usage: \n" | |
+ " java -jar malt.jar -f <path to option file> <options>\n" | |
+ " java -jar malt.jar -h for more help and options\n\n" | |
+ OptionManager.instance().getOptionDescriptions() | |
.toStringOptionGroup("system") | |
+ "Documentation: docs/index.html\n"); | |
return sb.toString(); | |
} | |
/** | |
* Returns a set of attributes present in the jar manifest file | |
* | |
* @return a set of attributes present in the jar manifest file | |
*/ | |
public static Attributes getManifestAttributes() { | |
return manifestAttributes; | |
} | |
/** | |
* Returns the version number as string | |
* | |
* @return the version number as string | |
*/ | |
public static String getVersion() { | |
return version; | |
} | |
/** | |
* Returns the build date | |
* | |
* @return the build date | |
*/ | |
public static String getBuildDate() { | |
return buildDate; | |
} | |
public static File getMaltJarPath() { | |
return maltJarPath; | |
} | |
/** | |
* Loads the manifest attributes from the manifest in the jar-file | |
* | |
* @throws MaltChainedException | |
*/ | |
private void getManifestInfo() throws MaltChainedException { | |
version = "1.5"; | |
buildDate = "meow"; | |
try { | |
URL codeBase = SystemInfo.class.getProtectionDomain() | |
.getCodeSource().getLocation(); | |
if (codeBase != null && codeBase.getPath().endsWith(".jar")) { | |
JarFile jarfile = new JarFile(URLDecoder.decode(codeBase | |
.getPath(), java.nio.charset.Charset.defaultCharset() | |
.name())); | |
Manifest manifest = jarfile.getManifest(); | |
Attributes manifestAttributes = manifest.getMainAttributes(); | |
version = manifestAttributes.getValue("Implementation-Version"); | |
buildDate = manifestAttributes.getValue("Build-Date"); | |
} | |
} catch (IOException e) { | |
version = ""; | |
buildDate = "Not available"; | |
e.printStackTrace(); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.maltparser.core.helper; | |
import java.io.BufferedInputStream; | |
import java.io.BufferedOutputStream; | |
import java.io.Closeable; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.net.MalformedURLException; | |
import java.net.URL; | |
import org.apache.log4j.Logger; | |
import org.maltparser.core.exception.MaltChainedException; | |
import org.maltparser.core.plugin.Plugin; | |
import org.maltparser.core.plugin.PluginLoader; | |
/** | |
* | |
* | |
* @author Johan Hall | |
*/ | |
public class Util { | |
private static final int BUFFER = 4096; | |
private static final char AMP_CHAR = '&'; | |
private static final char LT_CHAR = '<'; | |
private static final char GT_CHAR = '>'; | |
private static final char QUOT_CHAR = '"'; | |
private static final char APOS_CHAR = '\''; | |
public static String xmlEscape(String str) { | |
boolean needEscape = false; | |
char c; | |
for (int i = 0; i < str.length(); i++) { | |
c = str.charAt(i); | |
if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) { | |
needEscape = true; | |
break; | |
} | |
} | |
if (!needEscape) { | |
return str; | |
} | |
final StringBuilder sb = new StringBuilder(); | |
for (int i = 0; i < str.length(); i++) { | |
c = str.charAt(i); | |
if (str.charAt(i) == AMP_CHAR) { | |
sb.append("&"); | |
} else if ( str.charAt(i) == LT_CHAR) { | |
sb.append("<"); | |
} else if (str.charAt(i) == GT_CHAR) { | |
sb.append(">"); | |
} else if (str.charAt(i) == QUOT_CHAR) { | |
sb.append("""); | |
} else if (str.charAt(i) == APOS_CHAR) { | |
sb.append("'"); | |
} else { | |
sb.append(c); | |
} | |
} | |
return sb.toString(); | |
} | |
/** | |
* Search for a file according the following priority: | |
* <ol> | |
* <li>The local file system | |
* <li>Specified as an URL (starting with http:, file:, ftp: or jar: | |
* <li>MaltParser distribution file (malt.jar) | |
* <li>MaltParser plugins | |
* </ol> | |
* | |
* If the file string is found, an URL object is returned, otherwise <b>null</b> | |
* | |
* @param fileString the file string to convert into an URL. | |
* @return an URL object, if the file string is found, otherwise <b>null</b> | |
* @throws MaltChainedException | |
*/ | |
public static URL findURL(String fileString) throws MaltChainedException { | |
File specFile = new File(fileString); | |
try { | |
if (specFile.exists()) { | |
// found the file in the file system | |
return new URL("file:///"+specFile.getAbsolutePath()); | |
} else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) { | |
// the input string is an URL string starting with http, file, ftp or jar | |
return new URL(fileString); | |
} else { | |
return findURLinJars(fileString); | |
} | |
} catch (MalformedURLException e) { | |
throw new MaltChainedException("Malformed URL: "+fileString, e); | |
} | |
} | |
public static URL findURLinJars(String fileString) throws MaltChainedException { | |
try { | |
// search in malt.jar and its plugins | |
if (Util.class.getResource(fileString) != null) { | |
// found the input string in the malt.jar file | |
return Util.class.getResource(fileString); | |
} else { | |
for (Plugin plugin : PluginLoader.instance()) { | |
URL url = null; | |
if (!fileString.startsWith("/")) { | |
url = new URL("jar:"+plugin.getUrl() + "!/" + fileString); | |
} else { | |
url = new URL("jar:"+plugin.getUrl() + "!" + fileString); | |
} | |
try { | |
InputStream is = url.openStream(); | |
is.close(); | |
} catch (IOException e) { | |
continue; | |
} | |
// found the input string in one of the plugins | |
return url; | |
} | |
// could not convert the input string into an URL | |
return null; | |
} | |
} catch (MalformedURLException e) { | |
throw new MaltChainedException("Malformed URL: "+fileString, e); | |
} | |
} | |
public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) { | |
logger.info("."); | |
int tic = inTic + 1; | |
if (tic >= nTicxRow) { | |
ticInfo(logger, startTime, subject); | |
tic = 0; | |
} | |
return tic; | |
} | |
public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) { | |
logger.info("."); | |
for (int i = 1; i <= nTicxRow; i++) { | |
logger.info(" "); | |
} | |
ticInfo(logger, startTime, subject); | |
} | |
public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) { | |
for (int i = inTic; i <= nTicxRow; i++) { | |
logger.info(" "); | |
} | |
ticInfo(logger, startTime, subject); | |
} | |
private static void ticInfo(Logger logger, long startTime, int subject) { | |
logger.info("\t"); | |
int a = 1000000; | |
if (subject != 0) { | |
while (subject/a == 0) { | |
logger.info(" "); | |
a /= 10; | |
} | |
} else { | |
logger.info(" "); | |
} | |
logger.info(subject); | |
logger.info("\t"); | |
long time = (System.currentTimeMillis()-startTime)/1000; | |
a = 1000000; | |
if (time != 0) { | |
while (time/a == 0 ) { | |
logger.info(" "); | |
a /= 10; | |
} | |
logger.info(time); | |
logger.info("s"); | |
} else { | |
logger.info(" 0s"); | |
} | |
logger.info("\t"); | |
long memory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1000000; | |
a = 1000000; | |
if (memory != 0) { | |
while (memory/a == 0 ) { | |
logger.info(" "); | |
a /= 10; | |
} | |
logger.info(memory); | |
logger.info("MB\n"); | |
} else { | |
logger.info(" 0MB\n"); | |
} | |
} | |
public static void copyfile(String source, String destination) throws MaltChainedException { | |
try { | |
byte[] readBuffer = new byte[BUFFER]; | |
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source)); | |
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); | |
int n = 0; | |
while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { | |
bos.write(readBuffer, 0, n); | |
} | |
bos.flush(); | |
bos.close(); | |
bis.close(); | |
} catch (FileNotFoundException e) { | |
throw new MaltChainedException("The destination file '"+destination+"' cannot be created when coping the file. ", e); | |
} catch (IOException e) { | |
throw new MaltChainedException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e); | |
} | |
} | |
/** | |
* @param s the string to parse for the double value | |
* @throws IllegalArgumentException if s is empty or represents NaN or Infinity | |
* @throws NumberFormatException see {@link Double#parseDouble(String)} | |
*/ | |
public static double atof(String s) { | |
if (s == null || s.length() < 1) throw new IllegalArgumentException("Can't convert empty string to integer"); | |
double d = Double.parseDouble(s); | |
if (Double.isNaN(d) || Double.isInfinite(d)) { | |
throw new IllegalArgumentException("NaN or Infinity in input: " + s); | |
} | |
return (d); | |
} | |
/** | |
* @param s the string to parse for the integer value | |
* @throws IllegalArgumentException if s is empty | |
* @throws NumberFormatException see {@link Integer#parseInt(String)} | |
*/ | |
public static int atoi(String s) throws NumberFormatException { | |
if (s == null || s.length() < 1) throw new IllegalArgumentException("Can't convert empty string to integer"); | |
// Integer.parseInt doesn't accept '+' prefixed strings | |
if (s.charAt(0) == '+') s = s.substring(1); | |
return Integer.parseInt(s); | |
} | |
public static void closeQuietly(Closeable c) { | |
if (c == null) return; | |
try { | |
c.close(); | |
} catch (Throwable t) {} | |
} | |
public static double[] copyOf(double[] original, int newLength) { | |
double[] copy = new double[newLength]; | |
System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength)); | |
return copy; | |
} | |
public static int[] copyOf(int[] original, int newLength) { | |
int[] copy = new int[newLength]; | |
System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength)); | |
return copy; | |
} | |
public static boolean equals(double[] a, double[] a2) { | |
if (a == a2) return true; | |
if (a == null || a2 == null) return false; | |
int length = a.length; | |
if (a2.length != length) return false; | |
for (int i = 0; i < length; i++) | |
if (a[i] != a2[i]) return false; | |
return true; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment