Created
November 8, 2013 10:09
-
-
Save acolley/7368951 to your computer and use it in GitHub Desktop.
Compile this and occasionally get a compiler segfault.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import sequtils | |
import strutils | |
import tables | |
import typetraits | |
type | |
EDocoptLanguageError = object of E_Base | |
# nimrod's algebraic datatypes are 'case objects' | |
# represents option types | |
#TOptionKind* = enum | |
# OptString, | |
# OptSwitch | |
#TOption* = object | |
# case kind: TOptionKind | |
# of OptString: | |
# valStr: string | |
# of OptSwitch: | |
# valBool: bool | |
# NOTE: can also use {.inheritable.} and not have to inherit | |
# from TObject to allow inheritance | |
TPattern = ref object of TObject | |
TBranchPattern = ref object of TPattern | |
children: seq[TPattern] | |
TLeafPattern = ref object of TPattern | |
name, value: string | |
TArgument = ref object of TLeafPattern | |
TCommand = ref object of TArgument | |
TOption = ref object of TLeafPattern | |
short, long: string | |
argcount: int | |
TRequired = ref object of TBranchPattern | |
TOptional = ref object of TBranchPattern | |
TOptionsShortcut = ref object of TOptional | |
TOneOrMore = ref object of TBranchPattern | |
TEither = ref object of TBranchPattern | |
TTokens = seq[string] | |
# TODO: add these to std lib? | |
proc isUpper(s: string): bool = | |
result = s == s.toUpper() | |
proc lstrip(s: string, c=' '): string = | |
var i = 0 | |
while s[i] == c: | |
inc(i) | |
result = substr(s, i, len(s)-1) | |
proc lstrip(s: string, cs: set[char]): string = | |
var i = 0 | |
while s[i] in cs: | |
inc(i) | |
result = substr(s, i, len(s)-1) | |
proc rstrip(s: string, c=' '): string = | |
var i = len(s)-1 | |
while s[i] == c: | |
dec(i) | |
result = substr(s, 0, i) | |
proc rstrip(s: string, cs: set[char]): string = | |
var i = len(s)-1 | |
while s[i] in cs: | |
dec(i) | |
result = substr(s, 0, i) | |
# TPattern implementation | |
method `$`(patt: TPattern): string = | |
result = "TPattern" | |
method name(patt: TPattern): string = | |
result = "" | |
method value(patt: TPattern): string = | |
result = "" | |
method flat(patt: TPattern, types: openarray[string]): seq[TPattern] = | |
result = @[] | |
method match(patt: TPattern, left: seq[TPattern], coll: seq[TPattern]=nil): | |
tuple[success: bool, l, c: seq[TPattern]] = | |
result = (false, @[], @[]) | |
# END TPattern implementation | |
# TLeafPattern implementation | |
method name(patt: TLeafPattern): string = | |
result = patt.name | |
method value(patt: TLeafPattern): string = | |
result = patt.value | |
method `$`(patt: TLeafPattern): string = | |
result = patt.name | |
method flat(patt: TLeafPattern, types: openarray[string]): seq[TPattern] = | |
if len(types) == 0 or name(type(patt)) in types: | |
result = @[TPattern(patt)] | |
else: | |
result = @[] | |
#method match(patt: TLeafPattern, left: seq[TPattern], coll: seq[TPattern]=nil) = | |
# TODO: finish | |
# tuple[success: bool, l, c: seq[TPattern]] = | |
# var collected = coll | |
# if collected == nil: | |
# collected = @[] | |
# | |
# let (pos, mtch) = patt.single_match(left) | |
# END TLeafPattern implementation | |
# TArgument implementation | |
proc parseArgument(source: string): TArgument = | |
var name = findAll(source, re(r"<\S*?>", {}))[0] | |
var m = findAll(source, re(r"\[default: (.*)\]", {reIgnoreCase})) | |
if m[0] =~ re(r"\[default: (.*)\]", {}): | |
result = TArgument(name: name, value: matches[0]) | |
else: | |
result = TArgument(name: name, value: "") | |
method singleMatch(patt: TArgument, left: seq[TPattern]): | |
tuple[pos: int, patt: TPattern] = | |
var i = 0 | |
for pattern in left: | |
if type(pattern) is TArgument: | |
return (i, TPattern(TArgument(name: patt.name, value: pattern.value))) | |
return (-1, nil) | |
# END TArgument implementation | |
# TCommand implementation | |
# TODO: a command is a flag so the value | |
# should be a bool, find a way to represent | |
# this in this statically typed language | |
method singleMatch(patt: TCommand, left: seq[TPattern]): | |
tuple[pos: int, patt: TPattern] = | |
var i = 0 | |
for pattern in left: | |
if type(pattern) is TArgument: | |
if pattern.value == patt.name: | |
return (i, TPattern(TCommand(name: patt.name, value: "true"))) | |
else: | |
break | |
return (-1, nil) | |
# END TCommand | |
# TOption implementation | |
proc parseOption(optdesc: string): TOption = | |
var | |
short = "" | |
long = "" | |
argcount = 0 | |
value = "" | |
var (options, ign, desc) = optdesc.strip().partition(" ") | |
options = options.replace(",", " ").replace("=", " ") | |
for s in options.split(' '): | |
if s.startswith("--"): | |
long = s | |
elif s.startswith("-"): | |
short = s | |
else: | |
argcount = 1 | |
if argcount > 0: | |
var matched = findAll(desc, re(r"\[default: (.*)\]", {reIgnoreCase})) | |
if len(matched) > 0: | |
# bad hack! because findAll does not return sub matches | |
# we have to match against the first returned match | |
if matched[0] =~ re(r"\[default: (.*)\]", {reIgnoreCase}): | |
value = matches[0] | |
result = TOption(short: short, long: long, argcount: argcount, value: value) | |
method name(opt: TOption): string = | |
if opt.long != "": | |
result = opt.long | |
else: | |
result = opt.short | |
method singleMatch(patt: TOption, left: seq[TPattern]): | |
tuple[pos: int, patt: TPattern] = | |
var i = 0 | |
for pattern in left: | |
# FIXME: for some reason we can't use call style patt.name() | |
# as the compiler complains about needing an identifier | |
# so must use name(patt) style | |
if pattern.name() == name(patt): | |
return (i, pattern) | |
inc(i) | |
return (-1, nil) | |
# END TOption implementation | |
# TBranchPattern implementation | |
method flat(patt: TBranchPattern, types: openarray[string]): seq[TPattern] = | |
if name(type(patt)) in types: | |
result = @[TPattern(patt)] | |
else: | |
result = @[] | |
for child in patt.children: | |
result = result & child.flat(types) | |
#method fixIdentities(patt: TBranchPattern, uniq: seq[TPattern]) = | |
#method fixRepeatingArguments(patt: TBranchPattern): TBranchPattern = | |
#method fix() | |
# END TBranchPattern implementation | |
# TRequired implementation | |
method match(patt: TRequired, left: seq[TPattern], coll: seq[TPattern]=nil): | |
tuple[success: bool, l, c: seq[TPattern]] = | |
var collected = coll | |
if collected == nil: | |
collected = @[] | |
var | |
l = left | |
c = collected | |
for pattern in patt.children: | |
let matched = pattern.match(l, c) | |
var success = matched[0] | |
l = matched[1] | |
c = matched[2] | |
if success: | |
return (false, left, collected) | |
return (true, l, c) | |
# END TRequired implementation | |
# TOptional implementation | |
method match(patt: TOptional, left: seq[TPattern], coll: seq[TPattern]=nil): | |
tuple[success: bool, l, c: seq[TPattern]] = | |
var collected = coll | |
if collected == nil: | |
collected = @[] | |
var | |
l = left | |
c = collected | |
for pattern in patt.children: | |
var matched = pattern.match(l, c) | |
l = matched[1] | |
c = matched[2] | |
result = (true, l, c) | |
# END TOptional implementation | |
# TOneOrMore implementation | |
method match(patt: TOneOrMore, left: seq[TPattern], coll: seq[TPattern]=nil): | |
tuple[success: bool, l, c: seq[TPattern]] = | |
assert(len(patt.children) == 1) | |
var collected = coll | |
if collected == nil: | |
collected = @[] | |
var | |
l = left | |
c = collected | |
ltemp: seq[TPattern] = nil | |
matched = true | |
times = 0 | |
while matched: | |
# could it be that something didn't match but changed l or c? | |
var res = patt.children[0].match(l, c) | |
matched = res[0] | |
l = res[1] | |
c = res[2] | |
if matched: | |
inc(times) | |
if ltemp == l: | |
break | |
ltemp = l | |
if times >= 1: | |
result = (true, l, c) | |
else: | |
result = (false, left, collected) | |
# END TOneOrMore implementation | |
# TEither implementation | |
method match(patt: TEither, left: seq[TPattern], coll: seq[TPattern]=nil): | |
tuple[success: bool, l, c: seq[TPattern]] = | |
var collected = coll | |
if collected == nil: | |
collected = @[] | |
var outcomes: seq[tuple[success: bool, l, c: seq[TPattern]]] = @[] | |
for pattern in patt.children: | |
var outcome = pattern.match(left, collected) | |
if outcome[0]: | |
outcomes = outcomes & outcome | |
if len(outcomes) > 0: | |
# return smallest matching outcome pattern | |
# i.e. the one with the fewest left patterns | |
result = outcomes[0] | |
var i = 1 | |
while i < len(outcomes): | |
if len(outcomes[i][1]) < len(result[1]): | |
result = outcomes[i] | |
else: | |
result = (false, left, collected) | |
# END TEither | |
# TTokens implementation | |
proc current(tokens: TTokens): string = | |
if len(tokens) > 0: | |
result = tokens[0] | |
else: | |
result = "" | |
proc move(tokens: var TTokens): string = | |
if len(tokens) > 0: | |
result = tokens[0] | |
tokens.del(0) | |
else: | |
result = "" | |
# END TTokens implementation | |
iterator walk[T](s: seq[T], stride=1, start=0): T = | |
## walk through a sequence with given stride | |
assert(stride > 0) # cannot be neg or 0 otherwise inf loop | |
var i = start | |
while i < len(s): | |
yield s[i] | |
i += stride | |
proc walk[T](s: seq[T], stride=1, start=0): seq[T] = | |
accumulateResult(walk(s, stride, start)) | |
proc parseLong(tokens: var TTokens, options: var seq[TOption]): seq[TPattern] = | |
## long ::= "--" chars [ ( " " | "=" ) chars ] ; | |
# TODO: finish | |
var (long, eq, value) = move(tokens).partition("=") | |
assert(long.startswith("--")) | |
var similar = filter(options) do (opt: TOption) -> bool: | |
opt.long == long | |
var opt: TOption = nil | |
# TODO: support DocoptExit error here | |
if len(similar) > 1: # might be simply specified ambiguously 2+ times? | |
var longs = map(similar) do (opt: TOption) -> string: opt.long | |
raise newException(EDocoptLanguageError, long & " is not a unique prefix: " & | |
join(longs, ", ") & "?") | |
elif len(similar) < 1: | |
var argcount = 0 | |
if eq == "=": | |
argcount = 1 | |
opt = TOption(short: "", long: long, argcount: argcount, value: "") | |
options = options & opt | |
# TODO: support DocoptExit error here | |
else: | |
opt = TOption(short: similar[0].short, long: similar[0].long, | |
argcount: similar[0].argcount, value: similar[0].value) | |
if opt.argcount == 0: | |
if value != "": | |
raise newException(EDocoptLanguageError, opt.long & " must not have an argument") | |
else: | |
if value == "": | |
if current(tokens) in @["", "--"]: | |
raise newException(EDocoptLanguageError, opt.long & " requires argument") | |
value = move(tokens) | |
# TODO: support DocoptExit error here | |
if opt != nil: | |
result = @[TPattern(opt)] | |
else: | |
result = @[] | |
proc parseShorts(tokens: var TTokens, options: var seq[TOption]): seq[TPattern] = | |
# TODO: finish | |
var token = move(tokens) | |
assert(token.startswith("-") and not token.startswith("--")) | |
var left = token.lstrip('-') | |
result = @[] | |
var opt: TOption = nil | |
while left != "": | |
var | |
short = "-" & $left[0] | |
left = left[1..len(left)-1] | |
similar = filter(options) do (opt: TOption) -> bool: | |
opt.short == short | |
if len(similar) > 1: | |
raise newException(EDocoptLanguageError, short & " is specified ambiguously " & | |
$len(similar) & " times") | |
elif len(similar) < 1: | |
opt = TOption(short: short, long: "", argcount: 0, value: "") | |
options = options & opt | |
# TODO: if the error is DocoptExit | |
# we need to support this | |
else: # why is copying necessary here? | |
opt = TOption(short: short, long: similar[0].long, | |
argcount: similar[0].argcount, value: similar[0].value) | |
var value = "" | |
if opt.argcount != 0: | |
if left == "": | |
if current(tokens) in @["", "--"]: | |
raise newException(EDocoptLanguageError, short & " requires argument") | |
value = move(tokens) | |
else: | |
value = left | |
left = "" | |
# TODO: support the error being DocoptExit here | |
if opt != nil: | |
result = result & opt | |
opt = nil | |
# forward declaration for parseAtom | |
proc parseExpr(tokens: var TTokens, options: var seq[TOption]): seq[TPattern] | |
proc parseAtom(tokens: var TTokens, options: var seq[TOption]): seq[TPattern] = | |
## atom ::= "(" expr ")" | "[" expr "]" | "options" | |
## | long | shorts | argument | command ; | |
# TODO: finish | |
var token = current(tokens) | |
if token in @["(", "["]: | |
discard move(tokens) | |
var matching = "" | |
if token == "(": | |
result = @[TPattern(TRequired(children: parseExpr(tokens, options)))] | |
matching = ")" | |
else: | |
result = @[TPattern(TOptional(children: parseExpr(tokens, options)))] | |
matching = "]" | |
if move(tokens) != matching: | |
raise newException(EDocoptLanguageError, "unmatched '" & token & "'") | |
elif token == "options": | |
discard move(tokens) | |
result = @[TPattern(TOptionsShortcut())] | |
elif token.startswith("--") and token != "--": | |
result = parseLong(tokens, options) | |
elif token.startswith("-") and token notin @["-", "--"]: | |
result = parseShorts(tokens, options) | |
elif token.startswith("<") and token.endswith(">") or token.isUpper(): | |
result = @[TPattern(TArgument(name: move(tokens), value: ""))] | |
else: | |
result = @[TPattern(TCommand(name: move(tokens), value: ""))] | |
proc parseSeq(tokens: var TTokens, options: var seq[TOption]): seq[TPattern] = | |
## seq ::= ( atom [ "..." ] )* ; | |
# TODO: finish | |
result = @[] | |
while current(tokens) notin @["", "]", ")", "|"]: | |
var atom = parseAtom(tokens, options) | |
if current(tokens) == "...": | |
atom = @[TPattern(TOneOrMore(children: atom))] | |
discard move(tokens) | |
result = result & atom | |
proc parseExpr(tokens: var TTokens, options: var seq[TOption]): seq[TPattern] = | |
## expr ::= seq ( "|" seq )* ; | |
# TODO: finish | |
var sequence = parseSeq(tokens, options) | |
echo(current(tokens)) | |
if current(tokens) != "|": | |
return sequence | |
if len(sequence) > 1: | |
result = @[TPattern(TRequired(children: sequence))] | |
else: | |
result = sequence | |
while current(tokens) == "|": | |
discard move(tokens) | |
sequence = parseSeq(tokens, options) | |
if len(sequence) > 1: | |
result = result & TRequired(children: sequence) | |
else: | |
result = result & sequence | |
if len(result) > 1: | |
result = @[TPattern(TEither(children: sequence))] | |
proc parsePattern(source: string, options: var seq[TOption]): TPattern = | |
# parse from pattern into tokens | |
var src = source.replacef(re(r"([\[\]\(\)\|]|\.\.\.)", {}), " $1") | |
var tokens = src.split(re(r"\s+|(\S*<.*?>)", {reDotAll})) | |
let res = parseExpr(tokens, options) | |
if current(tokens) != "": | |
raise newException(EDocoptLanguageError, "unexpected ending: " & join(tokens, " ")) | |
result = TRequired(children: res) | |
proc parseArgv(tokens: var seq[string], options: var seq[TOption], optionsFirst=false): seq[TPattern] = | |
# TODO: finish | |
result = @[] | |
proc parseSection(name: string, source: string): seq[string] = | |
result = @[] | |
let found = findAll(source, re(r"^([^\n]*" & name & r"[^\n]*\n?(?:[ \t].*?(?:\n|$))*)", {reIgnoreCase, reMultiLine})) | |
for s in found: | |
result = result & strip(s) | |
proc parseDefaults(doc: string): seq[TOption] = | |
result = @[] | |
for s in parse_section("options:", doc): | |
# FIXME: corner case "bla: options: --foo | |
let post = partition(s, ':')[2] # get rid of "options:" | |
# docopt.py regex is "\n[ \t]*(-\S+?)" the reason for the | |
# positive lookahead here is that Nimrod's regexes don't | |
# return elements that were in matching parens | |
# TODO: maybe a useful addition to re.nim? | |
var splitStr = split("\n" & post, re(r"\n[ \t]*(?=-\S+?)", {})) | |
splitStr = splitStr[1..len(splitStr)-1] | |
result = result & map(filter(splitStr, proc(s: string): bool = s.startswith("-")), parseOption) | |
proc formalUsage(sec: string): string = | |
var section = split(sec, ':')[1] # drop "usage:" | |
var pu = section.split(re(r"\s+", {})) | |
pu = map(pu[1..len(pu)-1]) do (s: string) -> string: | |
if s == pu[0]: ") | (" | |
else: s | |
result = "( " & join(pu, " ") & " )" | |
proc docopt*(doc: string, argv: seq[string]=nil, help=true, version="", optionsFirst=false): TTable[string, string] = | |
# TODO: finish | |
result = initTable[string, string]() | |
var args = argv | |
if argv == nil: | |
args = @[] | |
for i in countup(0, paramCount()): | |
args = args & paramStr(i) | |
let usageSections = parseSection("usage:", doc) | |
if len(usageSections) == 0: | |
raise newException(EDocoptLanguageError, "\"usage:\" (case-insensitive) not found.") | |
if len(usageSections) > 1: | |
raise newException(EDocoptLanguageError, "More than one \"usage:\" (case-insensitive).") | |
# TODO: DocoptExit.usage = usage_sections[0] | |
var options = parseDefaults(doc) | |
let pattern = parsePattern(formalUsage(usageSections[0]), options) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment