Skip to content

Instantly share code, notes, and snippets.

@htmldoug
Created November 28, 2018 17:45
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save htmldoug/7b84b2febe67056c00199cea069003e3 to your computer and use it in GitHub Desktop.
Save htmldoug/7b84b2febe67056c00199cea069003e3 to your computer and use it in GitHub Desktop.
import java.io.OutputStream
import com.fasterxml.jackson.core.JsonFactory
import com.fasterxml.jackson.core.json.async.NonBlockingJsonParser
import upickle.core._
import scala.annotation.switch
import scala.collection.mutable
/**
* Uses the fast jackson [[NonBlockingJsonParser]] to parse the input bytes,
* and traverse the structure with a uJson [[Visitor]].
*/
class JacksonVisitorOutputStream[J](
rootVisitor: Visitor[_, J]
) extends OutputStream {
private val parser: NonBlockingJsonParser = new JsonFactory()
.createNonBlockingByteArrayParser()
.asInstanceOf[NonBlockingJsonParser]
private type StackObjArrVisitor = ObjArrVisitor[_, J]
private var stack: mutable.ArrayStack[StackObjArrVisitor] = {
// Create a dummy root stack element that returns the rootVisitor.
// This is simpler than special-casing the rootVisitor.
val rootArrVisitor: StackObjArrVisitor = new ArrVisitor[Any, Nothing] {
override def subVisitor: Visitor[Nothing, Any] = rootVisitor
override def visitValue(v: Any, index: Int): Unit = ()
override def visitEnd(index: Int): Nothing = {
throw new IllegalStateException("programming error: illegal call to dummy ArrVisitor")
}
}
val s = new mutable.ArrayStack[StackObjArrVisitor]()
s.push(rootArrVisitor)
s
}
private def facade: Visitor[_, J] = stack.top.subVisitor.asInstanceOf[Visitor[_, J]]
private def ctxt: ObjArrVisitor[Any, J] = stack.top.narrow
override def write(b: Int): Unit = {
// unoptimized and typically unused.
write(Array(b.byteValue()))
}
override def write(bytes: Array[Byte], idx: Int, len: Int): Unit = {
parser.feedInput(bytes, idx, len)
digest()
}
private def digest(): Unit = {
import com.fasterxml.jackson.core.JsonTokenId._
while (true) {
val token = parser.nextToken()
if (token == null) return
// Doug: @switch performs noticibly better in my benchmarks. https://stackoverflow.com/a/28133066
(token.id(): @switch) match {
case ID_NOT_AVAILABLE =>
return
case ID_START_OBJECT =>
stack.push(facade.visitObject(-1, index))
case ID_END_OBJECT | ID_END_ARRAY =>
val value = stack.pop().visitEnd(index)
ctxt.visitValue(value, index)
case ID_START_ARRAY =>
stack.push(facade.visitArray(-1, index))
case ID_FIELD_NAME =>
val objVisitor = ctxt.asInstanceOf[ObjVisitor[Any, J]]
objVisitor.visitKeyValue(objVisitor.visitKey(index).visitString(parser.getCurrentName, index))
case ID_STRING =>
ctxt.visitValue(facade.visitString(charSequence, index), index)
case ID_NUMBER_INT =>
// jackson emits "1e6" and "3.14" as FLOAT, so we're safe to pass -1 here.
ctxt.visitValue(facade.visitFloat64StringParts(charSequence, -1, -1, index), index)
case ID_NUMBER_FLOAT =>
ctxt.visitValue(facade.visitFloat64String(parser.getValueAsString, index), index)
case ID_TRUE =>
ctxt.visitValue(facade.visitTrue(index), index)
case ID_FALSE =>
ctxt.visitValue(facade.visitFalse(index), index)
case ID_NULL =>
ctxt.visitValue(facade.visitNull(index), index)
case ID_EMBEDDED_OBJECT =>
case _ =>
throw new IllegalStateException("Unexpected token")
}
}
}
private def charSequence: CharSequence = new TextBufferCharSequence(parser.getTextCharacters, parser.getTextOffset, parser.getTextLength)
/**
* [[parser]]'s character position over the entire input.
* Used for debugging.
*/
private def index: Int = {
parser.getTokenCharacterOffset.toInt
}
override def flush(): Unit = ()
override def close(): Unit = {
// flush parser's internal buffer.
parser.endOfInput()
digest()
}
}
/**
* Reference to a chunk of text in a VERY MUTABLE BUFFER.
*
* Contents are guaranteed to be stable for as long the receiving method is on the stack.
* After the receiving method returns control, the contents may change.
*
* Cheaper than allocating a String when one is not needed.
* When a String *is* required with [[toString]], the JIT can completely eliminate [[TextBufferCharSequence]] allocations.
*/
class TextBufferCharSequence(buf: Array[Char], off: Int, override val length: Int) extends CharSequence {
override def charAt(index: Int): Char = buf(off + index)
override def subSequence(start: Int, end: Int): CharSequence = new TextBufferCharSequence(buf, off + start, end - start)
override def toString: String = new String(buf, off, length)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment