Skip to content

Instantly share code, notes, and snippets.

@k8si
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save k8si/159be8027acf99c74b46 to your computer and use it in GitHub Desktop.
Save k8si/159be8027acf99c74b46 to your computer and use it in GitHub Desktop.
import java.io._
import cc.factorie.app.nlp.Document
object TestStuff {
def serializeStuff(): Unit = {
class Thing(s: String) extends Serializable {
override def toString: String = s"Thing($s)"
}
def serializeDoc(doc: Document, filename: String): String = {
val fileOut = new FileOutputStream(filename)
val out = new ObjectOutputStream(fileOut)
out.writeObject(doc)
out.close(); fileOut.close()
filename
}
def deserializeDoc(filename: String): Document = {
val fileIn = new FileInputStream(filename)
val in = new ObjectInputStream(fileIn)
val doc: Document = in.readObject().asInstanceOf[Document]
in.close(); fileIn.close()
doc
}
val str = "Barack Obama spoke yesterday"
val doc = new Document(str)
cc.factorie.app.nlp.segment.DeterministicTokenizer.process(doc)
doc.attr += new Thing("thing")
println("original string: " + doc.string)
println("original attr: " + doc.attr.toString)
println("original # tokens: " + doc.tokens.size)
println("original tokens:")
doc.tokens.foreach { t =>
t.attr += new Thing("token-thing")
println(t.string + " attr: " + t.attr.toString)
}
/** Output
*
* original string: Barack Obama spoke yesterday
* original attr: Thing(thing)
* original # tokens: 4
* original tokens:
* Barack attr: Thing(token-thing)
* Obama attr: Thing(token-thing)
* spoke attr: Thing(token-thing)
* yesterday attr: Thing(token-thing)
*
*/
println("")
println("serializing...")
val fname = serializeDoc(doc, "test.ser")
println("deserializing...")
val newDoc = deserializeDoc(fname)
println("")
println("deserialized string: " + newDoc.string)
println("deserialized attr: " + newDoc.attr.toString)
println("deserialized # tokens: " + newDoc.tokens.size)
println("deserialized tokens:")
newDoc.tokens.foreach { t => println(t.string + " attr: " + t.attr.toString) }
/** Output (WITHOUT custom {read/write}Object on Document and Token)
*
* deserialized string: Barack Obama spoke yesterday
* deserialized attr:
* deserialized # tokens: 4
* deserialized tokens:
* Barack attr:
* Obama attr:
* spoke attr:
* yesterday attr:
*
*/
/** Output (WITH custom {read/write}Object on Document and Token)
*
* deserialized string: Barack Obama spoke yesterday
* deserialized attr: Thing(thing)
* deserialized # tokens: 4
* deserialized tokens:
* Barack attr: Thing(token-thing)
* Obama attr: Thing(token-thing)
* spoke attr: Thing(token-thing)
* yesterday attr: Thing(token-thing)
*
*/
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment