Created
September 8, 2017 15:32
-
-
Save zaneli/7588e81a262e32625a2ae7cc126d6b2d to your computer and use it in GitHub Desktop.
Scala関西Summit 2017 LT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sbt.version=1.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
organization := "com.zaneli" | |
name := "zaneli-parser" | |
scalaVersion := "2.12.3" | |
libraryDependencies ++= Seq( | |
"org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.6", | |
"org.specs2" %% "specs2-core" % "3.9.5" % Test | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.zaneli.parser | |
import java.net.{URI, URISyntaxException, URLEncoder} | |
import scala.io.Codec | |
import scala.util.control.Exception | |
import scala.util.parsing.combinator.RegexParsers | |
object URIParser extends RegexParsers { | |
override val skipWhitespace = false | |
private[this] val HEX_CHAR = "%u" ~> """[a-fA-F0-9]{4}""".r ^^ { | |
x => encode(Integer.decode(s"0x$x").toChar.toString) | |
} | |
private[this] val START_BRACKET = "[" ^^ { encode } | |
private[this] val END_BRACKET = "]" ^^ { encode } | |
private[this] val PERCENT = """%[a-fA-F0-9]{2}""".r | "%" ^^ { encode } | |
private[this] val ANY_CHAR = ".".r | |
private[this] val NEW_LINE = opt("\r") ~ "\n" | |
private[this] val PARSERS = (HEX_CHAR | START_BRACKET | END_BRACKET | PERCENT | ANY_CHAR).* <~ opt(NEW_LINE) | |
private[this] def encode(s: String): String = { | |
URLEncoder.encode(s, Codec.UTF8.name) | |
} | |
def toURI(input: String): Either[String, URI] = { | |
eitherURI(input).left.flatMap { _ => | |
parse(input).right.flatMap(eitherURI(_).left.map(_.getMessage)) | |
} | |
} | |
def parse(input: String): Either[String, String] = parseAll(PARSERS, input) match { | |
case Success(xs, _) => Right(xs.mkString) | |
case NoSuccess(err, next) => Left(s"$err on line ${next.pos.line} on column ${next.pos.column}") | |
} | |
private[this] def eitherURI(input: String): Either[Throwable, URI] = { | |
Exception.catching(classOf[URISyntaxException]).either(new URI(input)) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.zaneli.parser | |
import java.net.{URI, URISyntaxException, URLDecoder, URLEncoder} | |
import org.specs2.mutable.Specification | |
import scala.io.Codec | |
class URIParserSpec extends Specification { | |
"URIParser" should { | |
"通常のURLをパースできる" in { | |
val url = "https://example.com" | |
new URI(url).toASCIIString must_== url | |
URIParser.parse(url) must beRight(url) | |
} | |
"正しくパーセントエンコードされたURLをパースできる" in { | |
val url = s"https://example.com/${encode("すから")}" | |
new URI(url).toASCIIString must_== url | |
URIParser.parse(url).right.map(decode) must beRight("https://example.com/すから") | |
} | |
"[]がエンコードされていないURLをパースできる" in { | |
val url = """https://example.com/[%E3%81%99%E3%81%8B%E3%82%89]""" | |
new URI(url) must throwA[URISyntaxException] | |
URIParser.parse(url).right.map( | |
u => decode(new URI(u).toASCIIString) | |
) must beRight("https://example.com/[すから]") | |
} | |
"エンコードされていない%が含まれるURLをパースできる" in { | |
val url = "https://example.com/%%E3%81%99%E3%81%8B%E3%82%89%" | |
new URI(url) must throwA[URISyntaxException] | |
URIParser.parse(url).right.map( | |
u => decode(new URI(u).toASCIIString) | |
) must beRight("https://example.com/%すから%") | |
} | |
"%u....の形式で16進文字コードが含まれるURLをパースできる" in { | |
val url = "https://example.com/%u3059%u304B%u3089" | |
new URI(url) must throwA[URISyntaxException] | |
URIParser.parse(url).right.map( | |
u => decode(new URI(u).toASCIIString) | |
) must beRight("https://example.com/すから") | |
} | |
} | |
private[this] def encode(s: String): String = { | |
URLEncoder.encode(s, Codec.UTF8.name) | |
} | |
private[this] def decode(s: String): String = { | |
URLDecoder.decode(s, Codec.UTF8.name) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment