Skip to content

Instantly share code, notes, and snippets.

@zaneli
Created September 8, 2017 15:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zaneli/7588e81a262e32625a2ae7cc126d6b2d to your computer and use it in GitHub Desktop.
Save zaneli/7588e81a262e32625a2ae7cc126d6b2d to your computer and use it in GitHub Desktop.
Scala関西Summit 2017 LT
sbt.version=1.0.1
organization := "com.zaneli"
name := "zaneli-parser"
scalaVersion := "2.12.3"
libraryDependencies ++= Seq(
"org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.6",
"org.specs2" %% "specs2-core" % "3.9.5" % Test
)
package com.zaneli.parser
import java.net.{URI, URISyntaxException, URLEncoder}
import scala.io.Codec
import scala.util.control.Exception
import scala.util.parsing.combinator.RegexParsers
object URIParser extends RegexParsers {
override val skipWhitespace = false
private[this] val HEX_CHAR = "%u" ~> """[a-fA-F0-9]{4}""".r ^^ {
x => encode(Integer.decode(s"0x$x").toChar.toString)
}
private[this] val START_BRACKET = "[" ^^ { encode }
private[this] val END_BRACKET = "]" ^^ { encode }
private[this] val PERCENT = """%[a-fA-F0-9]{2}""".r | "%" ^^ { encode }
private[this] val ANY_CHAR = ".".r
private[this] val NEW_LINE = opt("\r") ~ "\n"
private[this] val PARSERS = (HEX_CHAR | START_BRACKET | END_BRACKET | PERCENT | ANY_CHAR).* <~ opt(NEW_LINE)
private[this] def encode(s: String): String = {
URLEncoder.encode(s, Codec.UTF8.name)
}
def toURI(input: String): Either[String, URI] = {
eitherURI(input).left.flatMap { _ =>
parse(input).right.flatMap(eitherURI(_).left.map(_.getMessage))
}
}
def parse(input: String): Either[String, String] = parseAll(PARSERS, input) match {
case Success(xs, _) => Right(xs.mkString)
case NoSuccess(err, next) => Left(s"$err on line ${next.pos.line} on column ${next.pos.column}")
}
private[this] def eitherURI(input: String): Either[Throwable, URI] = {
Exception.catching(classOf[URISyntaxException]).either(new URI(input))
}
}
package com.zaneli.parser
import java.net.{URI, URISyntaxException, URLDecoder, URLEncoder}
import org.specs2.mutable.Specification
import scala.io.Codec
class URIParserSpec extends Specification {
"URIParser" should {
"通常のURLをパースできる" in {
val url = "https://example.com"
new URI(url).toASCIIString must_== url
URIParser.parse(url) must beRight(url)
}
"正しくパーセントエンコードされたURLをパースできる" in {
val url = s"https://example.com/${encode("すから")}"
new URI(url).toASCIIString must_== url
URIParser.parse(url).right.map(decode) must beRight("https://example.com/すから")
}
"[]がエンコードされていないURLをパースできる" in {
val url = """https://example.com/[%E3%81%99%E3%81%8B%E3%82%89]"""
new URI(url) must throwA[URISyntaxException]
URIParser.parse(url).right.map(
u => decode(new URI(u).toASCIIString)
) must beRight("https://example.com/[すから]")
}
"エンコードされていない%が含まれるURLをパースできる" in {
val url = "https://example.com/%%E3%81%99%E3%81%8B%E3%82%89%"
new URI(url) must throwA[URISyntaxException]
URIParser.parse(url).right.map(
u => decode(new URI(u).toASCIIString)
) must beRight("https://example.com/%すから%")
}
"%u....の形式で16進文字コードが含まれるURLをパースできる" in {
val url = "https://example.com/%u3059%u304B%u3089"
new URI(url) must throwA[URISyntaxException]
URIParser.parse(url).right.map(
u => decode(new URI(u).toASCIIString)
) must beRight("https://example.com/すから")
}
}
private[this] def encode(s: String): String = {
URLEncoder.encode(s, Codec.UTF8.name)
}
private[this] def decode(s: String): String = {
URLDecoder.decode(s, Codec.UTF8.name)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment