Skip to content

Instantly share code, notes, and snippets.

@fwip
Created November 16, 2018 03:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fwip/2f1a9c72d2b93ec5502562d85e8d8e1c to your computer and use it in GitHub Desktop.
Save fwip/2f1a9c72d2b93ec5502562d85e8d8e1c to your computer and use it in GitHub Desktop.
package locale
import (
"bufio"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"unicode/utf8"
)
// TODO: Is this correct? Does it handle malformed input?
var wordRegex = regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`)
// Parse transforms a text-definition of a locale into a Def
func Parse(in io.Reader) (def Def, err error) {
s := bufio.NewScanner(in)
commentChar := '#'
escapeChar := '\\'
lineNum := 0
curCategory := ""
// Catch any errors
defer func() {
if r := recover(); r != nil {
def = Def{}
err = fmt.Errorf("Line %d: %s", lineNum, r)
}
}()
// A closure to simplify error handling
die := func(format string, args ...interface{}) {
panic(fmt.Errorf("line %d: %s", lineNum, fmt.Sprintf(format, args)))
}
// More closures to make assertions and parse input
requireCategory := func(category, keyword string) {
if category != curCategory {
die("keyword %s is only applicable in category %s (current: '%s')", keyword, category, curCategory)
}
}
requireString := func(val string) string {
// TODO: Bug with escaped quote characters.
if val[0] != '"' || val[len(val)-1] != '"' {
die("Value '%s' needs to be a double-quoted string", val)
}
return val[1 : len(val)-1]
}
requireInt := func(val string) int {
n, err := strconv.Atoi(val)
if err != nil {
die("%s is not an integer", val)
}
return n
}
requireChar := func(val string) string {
if val[0] != '<' || val[len(val)-1] != '>' {
die("character %s must be surrounded by angle brackets", val)
}
return val[1 : len(val)-1]
}
requireCharList := func(val string) []string {
var out []string
for _, s := range strings.Split(val, ";") {
out = append(out, requireChar(s))
}
return out
}
requireCharPair := func(pair string) (first, second string) {
if pair[0] != '(' || pair[len(pair)-1] != ')' {
die("character pair '%s' not in parentheses", pair)
}
parts := strings.Split(pair[1:len(pair)-1], ",")
if len(parts) != 2 {
die("Need 2 elements in pair '%s'", pair)
}
return requireChar(parts[0]), requireChar(parts[1])
}
for s.Scan() {
lineNum++
line := s.Text()
// Discard comment lines
firstRune, _ := utf8.DecodeRuneInString(line)
if firstRune == commentChar {
continue
}
// Join newlines if they end with the escapeChar
// TODO: This is very inefficient
for {
var lastRune rune
for _, r := range line {
lastRune = r
}
if lastRune != escapeChar {
break
}
if lastRune == escapeChar {
if !s.Scan() {
die("Line continuation at end of file")
return Def{}, fmt.Errorf("line continuation on line %d was not followed by another line", lineNum)
}
line += strings.TrimSpace(s.Text())
lineNum++
}
}
// TODO: Find more robust way to split
words := wordRegex.FindAllString(line, -1)
if len(words) == 0 {
continue
}
if len(words) == 1 {
// Only category declarations can have only 1 word
switch words[0] {
case "LC_CTYPE", "LC_COLLATE", "LC_MONETARY", "LC_NUMERIC", "LC_TIME", "LC_MESSAGES":
curCategory = words[0]
default:
die("Unrecognized category name %s", words[0])
}
continue
}
// No line can have more than 2 words
if len(words) > 2 {
return Def{}, fmt.Errorf("line %d contains too many words: %s", lineNum, words)
}
// Logic!
keyword, val := words[0], words[1]
switch keyword {
// Header blocks
case "comment_char":
if curCategory != "" {
return Def{}, fmt.Errorf("line %d: unexpected %s during category %s", lineNum, keyword, curCategory)
}
commentChar, _ = utf8.DecodeRuneInString(words[1])
case "escape_char":
if curCategory != "" {
return Def{}, fmt.Errorf("line %d: unexpected %s during category %s", lineNum, keyword, curCategory)
}
escapeChar, _ = utf8.DecodeRuneInString(words[1])
// End block
case "END":
if val != curCategory {
return Def{}, fmt.Errorf("line %d: tried to end category %s during %s", lineNum, val, curCategory)
}
curCategory = ""
// Ctype values
case "charclass":
requireCategory("LC_CTYPE", keyword)
for _, cc := range strings.Split(val, ";") {
if _, in := def.ctype.other[cc]; !in {
def.ctype.other[cc] = make([]string, 0)
}
}
case "upper":
requireCategory("LC_CTYPE", keyword)
def.ctype.upper = requireCharList(val)
case "lower":
requireCategory("LC_CTYPE", keyword)
def.ctype.lower = requireCharList(val)
case "alpha":
requireCategory("LC_CTYPE", keyword)
def.ctype.alpha = requireCharList(val)
case "digit":
requireCategory("LC_CTYPE", keyword)
def.ctype.digit = requireCharList(val)
case "alnum":
requireCategory("LC_CTYPE", keyword)
def.ctype.alnum = requireCharList(val)
case "space":
requireCategory("LC_CTYPE", keyword)
def.ctype.space = requireCharList(val)
case "cntrl":
requireCategory("LC_CTYPE", keyword)
def.ctype.cntrl = requireCharList(val)
case "punct":
requireCategory("LC_CTYPE", keyword)
def.ctype.punct = requireCharList(val)
case "graph":
requireCategory("LC_CTYPE", keyword)
def.ctype.graph = requireCharList(val)
case "print":
requireCategory("LC_CTYPE", keyword)
def.ctype.print = requireCharList(val)
case "xdigit":
requireCategory("LC_CTYPE", keyword)
def.ctype.xdigit = requireCharList(val)
case "blank":
requireCategory("LC_CTYPE", keyword)
def.ctype.blank = requireCharList(val)
case "toupper":
for _, pair := range strings.Split(val, ";") {
lower, upper := requireCharPair(pair)
def.ctype.toupper[lower] = upper
}
case "tolower":
for _, pair := range strings.Split(val, ";") {
upper, lower := requireCharPair(pair)
def.ctype.tolower[upper] = lower
}
// Monetary values
case "int_curr_symbol":
requireCategory("LC_MONETARY", keyword)
def.monetary.intCurrSymbol = requireString(val)
case "currency_symbol":
requireCategory("LC_MONETARY", keyword)
def.monetary.currencySymbol = requireString(val)
case "mon_decimal_point":
requireCategory("LC_MONETARY", keyword)
def.monetary.monDecimalPoint = requireString(val)
case "mon_thousands_sep":
requireCategory("LC_MONETARY", keyword)
def.monetary.monThousandsSep = requireString(val)
case "mon_grouping":
requireCategory("LC_MONETARY", keyword)
for _, s := range strings.Split(val, ";") {
def.monetary.monGrouping = append(def.monetary.monGrouping, requireInt(s))
}
case "positive_sign":
requireCategory("LC_MONETARY", keyword)
def.monetary.positiveSign = val
case "negative_sign":
requireCategory("LC_MONETARY", keyword)
def.monetary.negativeSign = val
case "int_frac_digits":
requireCategory("LC_MONETARY", keyword)
def.monetary.intFracDigits = requireInt(val)
case "frac_digits":
requireCategory("LC_MONETARY", keyword)
def.monetary.fracDigits = requireInt(val)
case "p_cs_precedes":
requireCategory("LC_MONETARY", keyword)
def.monetary.pCsPrecedes = requireInt(val)
case "p_sep_by_space":
requireCategory("LC_MONETARY", keyword)
def.monetary.pSepBySpace = requireInt(val)
case "n_cs_precedes":
requireCategory("LC_MONETARY", keyword)
def.monetary.nCsPrecedes = requireInt(val)
case "n_sep_by_space":
requireCategory("LC_MONETARY", keyword)
def.monetary.nSepBySpace = requireInt(val)
case "p_sign_posn":
requireCategory("LC_MONETARY", keyword)
def.monetary.pSignPosn = requireInt(val)
case "n_sign_posn":
requireCategory("LC_MONETARY", keyword)
def.monetary.nSignPosn = requireInt(val)
case "int_p_cs_precedes":
requireCategory("LC_MONETARY", keyword)
def.monetary.intPcsPrecedes = requireInt(val)
case "int_p_sep_by_space":
requireCategory("LC_MONETARY", keyword)
def.monetary.intPsepBySpace = requireInt(val)
case "int_n_cs_precedes":
requireCategory("LC_MONETARY", keyword)
def.monetary.intNcsPrecedes = requireInt(val)
case "int_n_sep_by_space":
requireCategory("LC_MONETARY", keyword)
def.monetary.intNsepBySpace = requireInt(val)
case "int_p_sign_posn":
requireCategory("LC_MONETARY", keyword)
def.monetary.intPsignPosn = requireInt(val)
case "int_n_sign_posn":
requireCategory("LC_MONETARY", keyword)
def.monetary.intNsignPosn = requireInt(val)
default:
// Handle more complex values
switch {
case curCategory == "LC_CTYPE" && def.ctype.other[keyword] != nil:
def.ctype.other[keyword] = append(def.ctype.other[keyword], val)
default:
die("misc error: %s", line)
}
}
}
if curCategory != "" {
die("Category %s never ended", curCategory)
}
return def, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment