Last active
August 29, 2015 14:24
-
-
Save apense/a2e7fbddfbd0f3561b82 to your computer and use it in GitHub Desktop.
Attempt at Punycode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import strutils | |
import unicode | |
const | |
Base = 36 | |
TMin = 1 | |
TMax = 26 | |
Skew = 38 | |
Damp = 700 | |
InitialBias = 72 | |
InitialN = 128 | |
Delimiter = '-' | |
type | |
PunyError* = object of Exception | |
proc decodeDigit(x: char): int {.raises: [PunyError].} = | |
if '0' <= x and x <= '9': | |
result = ord(x) - (ord('0') - 26) | |
elif 'A' <= x and x <= 'Z': | |
result = ord(x) - ord('A') | |
elif 'a' <= x and x <= 'z': | |
result = ord(x) - ord('a') | |
else: | |
raise newException(PunyError, "Bad input") | |
proc encodeDigit(digit: int): Rune {.raises: [PunyError].} = | |
if 0 <= digit and digit < 26: | |
result = Rune(digit + ord('a')) | |
elif 26 <= digit and digit < 36: | |
result = Rune(digit + (ord('0') - 26)) | |
else: | |
raise newException(PunyError, "internal error in punycode encoding") | |
proc isBasic(c: char): bool = ord(c) < 0x80 | |
proc isBasic(r: Rune): bool = int(r) < 0x80 | |
proc adapt(delta, numPoints: int, first: bool): int = | |
var d = if first: delta div Damp else: delta div 2 | |
d += d div numPoints | |
var k = 0 | |
while d > ((Base-TMin)*TMax) div 2: | |
d = d div (Base - TMin) | |
k += Base | |
result = k + (Base - TMin + 1) * d div (d + Skew) | |
proc encode*(prefix = "", s: string): string {.raises: [PunyError].} = | |
## Encode a string that may contain Unicode. | |
## Prepend `prefix` to the result | |
result = prefix | |
var (d, n, bias) = (0, InitialN, InitialBias) | |
var (b, remaining) = (0, 0) | |
for r in s.runes: | |
if r.isBasic: | |
# basic Ascii character | |
inc b | |
result.add($r) | |
else: | |
# special character | |
inc remaining | |
var h = b | |
if b > 0: | |
result.add(Delimiter) # we have some Ascii chars | |
while remaining != 0: | |
var m: int = high(int32) | |
for r in s.runes: | |
if m > int(r) and int(r) >= n: | |
m = int(r) | |
d += (m - n) * (h + 1) | |
if d < 0: | |
raise newException(PunyError, "invalid label " & s) | |
n = m | |
for r in s.runes: | |
if int(r) < n: | |
inc d | |
if d < 0: | |
raise newException(PunyError, "invalid label " & s) | |
continue | |
if int(r) > n: | |
continue | |
var q = d | |
var k = Base | |
while true: | |
var t = k - bias | |
if t < TMin: | |
t = TMin | |
elif t > TMax: | |
t = TMax | |
if q < t: | |
break | |
result.add($encodeDigit(t + (q - t) mod (Base - t))) | |
q = (q - t) div (Base - t) | |
k += Base | |
result.add($encodeDigit(q)) | |
bias = adapt(d, h + 1, h == b) | |
d = 0 | |
inc h | |
dec remaining | |
inc d | |
inc n | |
proc decode*(encoded: string): string {.raises: [PunyError].} = | |
## Decode a Punycode-encoded string | |
var | |
n = InitialN | |
i = 0 | |
bias = InitialBias | |
var d = rfind(encoded, Delimiter) | |
result = "" | |
if d > 0: | |
# found Delimiter | |
for j in 0..<d: | |
var c = encoded[j] # char | |
if not c.isBasic: | |
raise newException(PunyError, "Encoded contains a non-basic char") | |
result.add(c) # add the character | |
inc d | |
else: | |
d = 0 # set to first index | |
while (d < len(encoded)): | |
var oldi = i | |
var w = 1 | |
var k = Base | |
while true: | |
if d == len(encoded): | |
raise newException(PunyError, "Bad input: " & encoded) | |
var c = encoded[d]; inc d | |
var digit = int(decodeDigit(c)) | |
if digit > (high(int32) - i) div w: | |
raise newException(PunyError, "Too large a value: " & $digit) | |
i += digit * w | |
var t: int | |
if k <= bias: | |
t = TMin | |
elif k >= bias + TMax: | |
t = TMax | |
else: | |
t = k - bias | |
if digit < t: | |
break | |
w *= Base - t | |
k += Base | |
bias = adapt(i - oldi, runelen(result) + 1, oldi == 0) | |
if i div (runelen(result) + 1) > high(int32) - n: | |
raise newException(PunyError, "Value too large") | |
n += i div (runelen(result) + 1) | |
i = i mod (runelen(result) + 1) | |
insert(result, $Rune(n), i) | |
inc i | |
when isMainModule: | |
assert(decode(encode("", "bücher")) == "bücher") | |
assert(decode(encode("münchen")) == "münchen") | |
assert encode("xn--", "münchen") == "xn--mnchen-3ya" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment