This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def leftMultiplyAndSumAndMax(t: Tensor1, v: DenseTensor1): (DenseTensor1, Double) = { | |
assert(dim1 == t.dim1, "Dimensions don't match: " + dim1 + " " + t.dim1) | |
val myDim2 = dim2 | |
val newT = v.copy | |
val newArray = newT.asArray | |
var max = Double.MinValue | |
var currentVal = 0.0 | |
t match { | |
case t: DenseTensor => | |
val tArr = t.asArray |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<meta content="Read Featured Article “YesAsia 2011 Bestsellers - Movies” by YumCha! Editorial Team mentioning Donnie Yen,Shawn Yue,Jung Woo Sung,Michelle Yeoh,Jiang Wen,Andy Lau,Kaneshiro Takeshi,Yamazaki Takashi,Benny Chan,Won Bin,Kim Jee Woon,Kimura Takuya,Tsui Hark,Su Chao Pin,Barbie Hsu,Wang Xue Qi,John Woo,Peter Chan, "Villain (DVD) (English Subtitled) (Hong Kong Version)","Let The Bullets Fly (2010) (Blu-ray + DVD) (Hong Kong Version)","Villain (Blu-ray) (English Subtitled) (Hong Kong Version)","The Borrower Arrietty (DVD) (English Subtitled) (2-Disc Edition) (Hong Kong Version)","Shaolin (2011) (Blu-ray) (English Subtitled) (Hong Kong Version)","A Better Tomorrow (2010) (DVD) (First Press Limited Edition) (Korea Version)","Outrage (Blu-ray) (English Subtitled) (Hong Kong Version)","The Housemaid (2010) (DVD) (Single Disc) (Korea Version)","Bruce Lee My Brother (Blu-ray) (Hong Kong Version)","Space Battleship |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ProcessSlotFillingCorpusOpts extends CmdOptions { | |
val dataDirs = new CmdOption[List[String]]("data-dirs", List.empty[String], "FILENAME...", "List of directories containing (only) data files in sgml format.") | |
val dataFiles = new CmdOption[List[String]]("data-files", List.empty[String], "FILENAME...", "List of files in sgml format.") | |
val dataFilesFile = new CmdOption("data-files-file", "", "FILENAME", "File containing a list of paths to data files, one per line.") | |
val reprocess = new CmdOption("reprocess", false, "BOOL", "Whether to re-process documents that we find to be already serialized.") | |
val outputDir = new CmdOption("output-dir", "", "FILENAME", "Directory to which to serialize processed docs") | |
val inputType = new CmdOption("input-type", "filename", "STRING", "Type of the input: filename, docid, document. Document assumes plain text serialized documents, filename is tac corpus filenames, docid is lookup into original corpus by docid.") | |
val retag = new CmdOption("retag-type", "none", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Return a string that captures the generic "shape" of the original word, | |
mapping lowercase alphabetics to 'a', uppercase to 'A', digits to '1', whitespace to ' '. | |
Skip more than 'maxRepetitions' of the same character class. */ | |
def stringShape(word:String, maxRepetitions:Int): String = { | |
val sb = new StringBuffer | |
var i = 0; var c = 'x'; var prevc = 'x'; var repetitions = 0 | |
while (i < word.length) { | |
val char = word(i) | |
if (Character.isUpperCase(char)) c = 'A' | |
else if (Character.isLowerCase(char)) c = 'a' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object ProcessSlotFillingCorpusParallel { | |
def main(args: Array[String]) { | |
implicit val random = new scala.util.Random(0) | |
val opts = new ProcessSlotFillingCorpusParallelOpts | |
opts.parse(args) | |
/* Load data files */ | |
if (opts.dataDirs.wasInvoked && opts.dataFiles.wasInvoked) { | |
println("Please specify either a list of data directories or files but not both.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cc.factorie.tutorial | |
import cc.factorie._ | |
import java.io.File | |
import cc.factorie.variable._ | |
import cc.factorie.model.{Parameters, DotTemplateWithStatistics2, DotTemplateWithStatistics1, TemplateModel} | |
import cc.factorie.infer.{BPSummary, BP, IteratedConditionalModes, GibbsSampler} | |
/** A demonstration of training a linear-chain CRF for named entity recognition. | |
Prints various diagnostics suitable to a demo. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def serialize(stream: java.io.OutputStream): Unit = { | |
import CubbieConversions._ | |
val dstream = new java.io.DataOutputStream(new BufferedOutputStream(stream)) | |
domains.foreach{domain => BinarySerializer.serialize(domain.dimensionDomain, dstream)} | |
BinarySerializer.serialize(model, dstream) | |
BinarySerializer.serialize(WordData.ambiguityClasses, dstream) | |
BinarySerializer.serialize(WordData.sureTokens, dstream) | |
BinarySerializer.serialize(WordData.docWordCounts, dstream) | |
// don't have implicits for serializing set, so convert to a map | |
val obsWordsMap = collection.mutable.HashMap[String,String]() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
org.jdom2.input.JDOMParseException: Error on line 1 of document file:/iesl/canvas/ksilvers/eval/data/acl/full-pdfs/E09-1037.pdf: Content is not allowed in prolog. | |
at org.jdom2.input.sax.SAXBuilderEngine.build(SAXBuilderEngine.java:228) | |
at org.jdom2.input.sax.SAXBuilderEngine.build(SAXBuilderEngine.java:277) | |
at org.jdom2.input.sax.SAXBuilderEngine.build(SAXBuilderEngine.java:264) | |
at org.jdom2.input.SAXBuilder.build(SAXBuilder.java:1116) | |
at edu.umass.cs.iesl.rpp.Main$.process(Main.scala:16) | |
at edu.umass.cs.iesl.rpp.BatchMain$$anonfun$main$1.apply(BatchMain.scala:47) | |
at edu.umass.cs.iesl.rpp.BatchMain$$anonfun$main$1.apply(BatchMain.scala:45) | |
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) | |
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
java.lang.IndexOutOfBoundsException: -1 | |
at scala.collection.immutable.Vector.checkRangeConvert(Vector.scala:132) | |
at scala.collection.immutable.Vector.apply(Vector.scala:122) | |
at edu.umass.cs.iesl.xml_annotator.Annotator.pair2Total(Annotator.scala:561) | |
at edu.umass.cs.iesl.xml_annotator.Annotator$$anonfun$28.apply(Annotator.scala:964) | |
at edu.umass.cs.iesl.xml_annotator.Annotator$$anonfun$28.apply(Annotator.scala:962) | |
at scala.collection.immutable.HashMap$HashMap1.filter0(HashMap.scala:218) | |
at scala.collection.immutable.HashMap$HashTrieMap.filter0(HashMap.scala:385) | |
at scala.collection.immutable.HashMap$HashTrieMap.filter0(HashMap.scala:385) | |
at scala.collection.immutable.HashMap.filter(HashMap.scala:70) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# pdf2svg-parallel.sh | |
# | |
# Usage: pdf2svg-parallel.sh /dir/containing/pdfs /output/dir | |
# | |
# You must set the PDF2SVG_ROOT environment variable to a directory containing | |
# iesl-pdf-to-text, e.g. from that directory: | |
# export PDF2SVG_ROOT=`pwd` | |
# |
OlderNewer