Last active
September 25, 2023 03:17
-
-
Save er2/af8172a88d7f209e8cdedd90d20fa969 to your computer and use it in GitHub Desktop.
Scala UTF-8 Validation from leetcode - https://leetcode.com/problems/utf-8-validation/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.annotation.tailrec | |
object Solution { | |
def validUtf8(data: Array[Int]): Boolean = validUtf8(data.toList) | |
@tailrec | |
def validUtf8(data: List[Int]): Boolean = data match { | |
case Nil => true | |
case n1 :: ns if ASCII_SEQ(n1) => validUtf8(ns) | |
case n1 :: n2 :: ns if TWO_BYTE(n1, n2) => validUtf8(ns) | |
case n1 :: n2 :: n3 :: ns if THREE_BYTE(n1, n2, n3) => validUtf8(ns) | |
case n1 :: n2 :: n3 :: n4 :: ns if FOUR_BYTE(n1, n2, n3, n4) => validUtf8(ns) | |
case _ => false | |
} | |
case class Pattern(p: String) { | |
val mask = Integer.parseInt(p.replace('0', '1').replace('X', '0'), 2) | |
val maskTarget = Integer.parseInt(p.replace('X', '0'), 2) | |
def ~~(i: Int): Boolean = (mask & i) == maskTarget | |
} | |
val ASCII_PATTERN = Pattern("0XXXXXXX") | |
val TRAILING = Pattern("10XXXXXX") | |
val FIRST_OF_TWO = Pattern("110XXXXX") | |
val FIRST_OF_THREE = Pattern("1110XXXX") | |
val FIRST_OF_FOUR = Pattern("11110XXX") | |
class Sequence(patterns: Pattern*) { | |
def apply(ns: Int*): Boolean = patterns zip ns forall { _ ~~ _ } | |
} | |
val ASCII_SEQ = Sequence(ASCII_PATTERN) | |
val TWO_BYTE = Sequence(FIRST_OF_TWO, TRAILING) | |
val THREE_BYTE = Sequence(FIRST_OF_THREE, TRAILING, TRAILING) | |
val FOUR_BYTE = Sequence(FIRST_OF_FOUR, TRAILING, TRAILING, TRAILING) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment