Skip to content

Instantly share code, notes, and snippets.

@EdgeCaseBerg
Created January 30, 2017 21:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EdgeCaseBerg/fe01af79bdfd3fef7836066d5c7940ed to your computer and use it in GitHub Desktop.
Save EdgeCaseBerg/fe01af79bdfd3fef7836066d5c7940ed to your computer and use it in GitHub Desktop.
Example of how to clean out encoded characters from JSON values in scala
libraryDependencies += "com.typesafe.play" %% "play-json" % "2.5.9"
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.4"
scalaVersion := "2.11.7"
import play.api.libs.json._
import play.api.data.validation.ValidationError
import scala.io.Source
import org.apache.commons.lang3.StringEscapeUtils
import java.nio.file.{Paths, Files}
import java.nio.charset.StandardCharsets
object example {
def cleanJsValue(jsValue: JsValue): JsValue = {
jsValue match {
case JsObject(fields) =>
val updatedFields = fields.map { case (k,v) =>
k -> cleanJsValue(v)
}
JsObject(updatedFields)
case JsArray(list) => JsArray(list.map(cleanJsValue))
case JsString(s) => JsString(StringEscapeUtils.unescapeHtml4(s))
case j => j
}
}
def main(args: Array[String]): Unit = {
args.map { fileName =>
val s = Source.fromFile(fileName).map(_.toByte).toArray
val myJson = Json.parse(s)
val cleanedJson = cleanJsValue(myJson)
Files.write(Paths.get(fileName + ".cleaned.json"), Json.prettyPrint(cleanedJson).getBytes(StandardCharsets.UTF_8))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment