Created
November 16, 2018 03:50
-
-
Save fwip/2f1a9c72d2b93ec5502562d85e8d8e1c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package locale | |
import ( | |
"bufio" | |
"fmt" | |
"io" | |
"regexp" | |
"strconv" | |
"strings" | |
"unicode/utf8" | |
) | |
// TODO: Is this correct? Does it handle malformed input? | |
var wordRegex = regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`) | |
// Parse transforms a text-definition of a locale into a Def | |
func Parse(in io.Reader) (def Def, err error) { | |
s := bufio.NewScanner(in) | |
commentChar := '#' | |
escapeChar := '\\' | |
lineNum := 0 | |
curCategory := "" | |
// Catch any errors | |
defer func() { | |
if r := recover(); r != nil { | |
def = Def{} | |
err = fmt.Errorf("Line %d: %s", lineNum, r) | |
} | |
}() | |
// A closure to simplify error handling | |
die := func(format string, args ...interface{}) { | |
panic(fmt.Errorf("line %d: %s", lineNum, fmt.Sprintf(format, args))) | |
} | |
// More closures to make assertions and parse input | |
requireCategory := func(category, keyword string) { | |
if category != curCategory { | |
die("keyword %s is only applicable in category %s (current: '%s')", keyword, category, curCategory) | |
} | |
} | |
requireString := func(val string) string { | |
// TODO: Bug with escaped quote characters. | |
if val[0] != '"' || val[len(val)-1] != '"' { | |
die("Value '%s' needs to be a double-quoted string", val) | |
} | |
return val[1 : len(val)-1] | |
} | |
requireInt := func(val string) int { | |
n, err := strconv.Atoi(val) | |
if err != nil { | |
die("%s is not an integer", val) | |
} | |
return n | |
} | |
requireChar := func(val string) string { | |
if val[0] != '<' || val[len(val)-1] != '>' { | |
die("character %s must be surrounded by angle brackets", val) | |
} | |
return val[1 : len(val)-1] | |
} | |
requireCharList := func(val string) []string { | |
var out []string | |
for _, s := range strings.Split(val, ";") { | |
out = append(out, requireChar(s)) | |
} | |
return out | |
} | |
requireCharPair := func(pair string) (first, second string) { | |
if pair[0] != '(' || pair[len(pair)-1] != ')' { | |
die("character pair '%s' not in parentheses", pair) | |
} | |
parts := strings.Split(pair[1:len(pair)-1], ",") | |
if len(parts) != 2 { | |
die("Need 2 elements in pair '%s'", pair) | |
} | |
return requireChar(parts[0]), requireChar(parts[1]) | |
} | |
for s.Scan() { | |
lineNum++ | |
line := s.Text() | |
// Discard comment lines | |
firstRune, _ := utf8.DecodeRuneInString(line) | |
if firstRune == commentChar { | |
continue | |
} | |
// Join newlines if they end with the escapeChar | |
// TODO: This is very inefficient | |
for { | |
var lastRune rune | |
for _, r := range line { | |
lastRune = r | |
} | |
if lastRune != escapeChar { | |
break | |
} | |
if lastRune == escapeChar { | |
if !s.Scan() { | |
die("Line continuation at end of file") | |
return Def{}, fmt.Errorf("line continuation on line %d was not followed by another line", lineNum) | |
} | |
line += strings.TrimSpace(s.Text()) | |
lineNum++ | |
} | |
} | |
// TODO: Find more robust way to split | |
words := wordRegex.FindAllString(line, -1) | |
if len(words) == 0 { | |
continue | |
} | |
if len(words) == 1 { | |
// Only category declarations can have only 1 word | |
switch words[0] { | |
case "LC_CTYPE", "LC_COLLATE", "LC_MONETARY", "LC_NUMERIC", "LC_TIME", "LC_MESSAGES": | |
curCategory = words[0] | |
default: | |
die("Unrecognized category name %s", words[0]) | |
} | |
continue | |
} | |
// No line can have more than 2 words | |
if len(words) > 2 { | |
return Def{}, fmt.Errorf("line %d contains too many words: %s", lineNum, words) | |
} | |
// Logic! | |
keyword, val := words[0], words[1] | |
switch keyword { | |
// Header blocks | |
case "comment_char": | |
if curCategory != "" { | |
return Def{}, fmt.Errorf("line %d: unexpected %s during category %s", lineNum, keyword, curCategory) | |
} | |
commentChar, _ = utf8.DecodeRuneInString(words[1]) | |
case "escape_char": | |
if curCategory != "" { | |
return Def{}, fmt.Errorf("line %d: unexpected %s during category %s", lineNum, keyword, curCategory) | |
} | |
escapeChar, _ = utf8.DecodeRuneInString(words[1]) | |
// End block | |
case "END": | |
if val != curCategory { | |
return Def{}, fmt.Errorf("line %d: tried to end category %s during %s", lineNum, val, curCategory) | |
} | |
curCategory = "" | |
// Ctype values | |
case "charclass": | |
requireCategory("LC_CTYPE", keyword) | |
for _, cc := range strings.Split(val, ";") { | |
if _, in := def.ctype.other[cc]; !in { | |
def.ctype.other[cc] = make([]string, 0) | |
} | |
} | |
case "upper": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.upper = requireCharList(val) | |
case "lower": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.lower = requireCharList(val) | |
case "alpha": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.alpha = requireCharList(val) | |
case "digit": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.digit = requireCharList(val) | |
case "alnum": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.alnum = requireCharList(val) | |
case "space": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.space = requireCharList(val) | |
case "cntrl": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.cntrl = requireCharList(val) | |
case "punct": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.punct = requireCharList(val) | |
case "graph": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.graph = requireCharList(val) | |
case "print": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.print = requireCharList(val) | |
case "xdigit": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.xdigit = requireCharList(val) | |
case "blank": | |
requireCategory("LC_CTYPE", keyword) | |
def.ctype.blank = requireCharList(val) | |
case "toupper": | |
for _, pair := range strings.Split(val, ";") { | |
lower, upper := requireCharPair(pair) | |
def.ctype.toupper[lower] = upper | |
} | |
case "tolower": | |
for _, pair := range strings.Split(val, ";") { | |
upper, lower := requireCharPair(pair) | |
def.ctype.tolower[upper] = lower | |
} | |
// Monetary values | |
case "int_curr_symbol": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intCurrSymbol = requireString(val) | |
case "currency_symbol": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.currencySymbol = requireString(val) | |
case "mon_decimal_point": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.monDecimalPoint = requireString(val) | |
case "mon_thousands_sep": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.monThousandsSep = requireString(val) | |
case "mon_grouping": | |
requireCategory("LC_MONETARY", keyword) | |
for _, s := range strings.Split(val, ";") { | |
def.monetary.monGrouping = append(def.monetary.monGrouping, requireInt(s)) | |
} | |
case "positive_sign": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.positiveSign = val | |
case "negative_sign": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.negativeSign = val | |
case "int_frac_digits": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intFracDigits = requireInt(val) | |
case "frac_digits": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.fracDigits = requireInt(val) | |
case "p_cs_precedes": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.pCsPrecedes = requireInt(val) | |
case "p_sep_by_space": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.pSepBySpace = requireInt(val) | |
case "n_cs_precedes": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.nCsPrecedes = requireInt(val) | |
case "n_sep_by_space": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.nSepBySpace = requireInt(val) | |
case "p_sign_posn": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.pSignPosn = requireInt(val) | |
case "n_sign_posn": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.nSignPosn = requireInt(val) | |
case "int_p_cs_precedes": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intPcsPrecedes = requireInt(val) | |
case "int_p_sep_by_space": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intPsepBySpace = requireInt(val) | |
case "int_n_cs_precedes": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intNcsPrecedes = requireInt(val) | |
case "int_n_sep_by_space": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intNsepBySpace = requireInt(val) | |
case "int_p_sign_posn": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intPsignPosn = requireInt(val) | |
case "int_n_sign_posn": | |
requireCategory("LC_MONETARY", keyword) | |
def.monetary.intNsignPosn = requireInt(val) | |
default: | |
// Handle more complex values | |
switch { | |
case curCategory == "LC_CTYPE" && def.ctype.other[keyword] != nil: | |
def.ctype.other[keyword] = append(def.ctype.other[keyword], val) | |
default: | |
die("misc error: %s", line) | |
} | |
} | |
} | |
if curCategory != "" { | |
die("Category %s never ended", curCategory) | |
} | |
return def, nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment