Skip to content

Instantly share code, notes, and snippets.

@Parashurama
Last active June 1, 2016 19:58
Add support for unicode and ASCII escape code in triple string
proc parse_hex*(num: var int; ch: char) =
case ch
of '0'..'9': num = (num shl 4) or (ch.int - '0'.int)
of 'a'..'f': num = (num shl 4) or (ch.int - 'a'.int) + 10
of 'A'..'F': num = (num shl 4) or (ch.int - 'A'.int) + 10
else: raise newException(ValueError, "invalid unicode hexcode")
template ones(n: expr): expr = ((1 shl n)-1)
proc runeLenAt(s: string, i: Natural): int =
## Returns the number of bytes the rune starting at ``s[i]`` takes
if ord(s[i]) <=% 127: result = 1
elif ord(s[i]) shr 5 == 0b110: result = 2
elif ord(s[i]) shr 4 == 0b1110: result = 3
elif ord(s[i]) shr 3 == 0b11110: result = 4
elif ord(s[i]) shr 2 == 0b111110: result = 5
elif ord(s[i]) shr 1 == 0b1111110: result = 6
else: result = 1
proc usc*(s: string): string {.compiletime.} =
result = ""
var idx: int
while idx < len(s):
var ch = s[idx]
if ch == '\\':
# is escape code.
inc(idx)
ch = s[idx]
case ch
of '\\': # escaped backslash
result.add('\\')
of 'n', 'l', 'L': # newline literal
result.add('\L')
of '0': # octal escape
var num = 0
while s[idx + 1] in '0'..'7':
num = (num shl 3) or (s[idx + 1].int - '0'.int)
inc(idx)
result.add(num.char);
of 'u', 'x': # unicode hex escape
let n_chars = if ch == 'u': 4 else: 2
var num = 0
for i in idx+1..idx+n_chars:
parse_hex(num, s[i])
#~ result.add(num.Rune.toUTF8)
#~ # inlined toUTF-8 to avoid unicode and strutils dependencies.
if num <=% 127:
result.add( num.char )
elif num <=% 0x07FF:
result.add( ((num shr 6) or 0b110_00000).char )
result.add( ((num and ones(6)) or 0b10_0000_00).char )
elif num <=% 0xFFFF:
result.add( (num shr 12 or 0b1110_0000).char )
result.add( (num shr 6 and ones(6) or 0b10_0000_00).char )
result.add( (num and ones(6) or 0b10_0000_00).char )
else: # value is 0xFFFF
result.add( "\xef\xbf\xbf" )
inc(idx, n_chars)
else: # skip unknow escape code.
discard
inc(idx)
continue
else:
# get UTF-8 byte sequence len and add to output.
let last_char = idx + runeLenAt(s, idx)
while idx < last_char:
result.add(s[idx])
inc(idx)
when isMainModule:
echo usc"""mn\lue \075 \x17 \u25cf\uFFFF"""
echo """mn\lue \075 \x17 \u25cf\uFFFF"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment