Skip to content

Instantly share code, notes, and snippets.

@tsudoko
Last active April 23, 2019 00:08
Show Gist options
  • Save tsudoko/0b5d416a7f4efef74d9a8a69663aca02 to your computer and use it in GitHub Desktop.
Save tsudoko/0b5d416a7f4efef74d9a8a69663aca02 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import ast
import sys
encodings = ["ascii", "big5", "big5hkscs", "cp037", "cp424", "cp437", "cp500", "cp720", "cp737", "cp775", "cp850", "cp852", "cp855", "cp856", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", "cp950", "cp1006", "cp1026", "cp1140", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "euc_jp", "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz", "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004", "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6", "iso8859_7", "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_13", "iso8859_14", "iso8859_15", "iso8859_16", "johab", "koi8_r", "koi8_u", "mac_cyrillic", "mac_greek", "mac_iceland", "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis", "shift_jis_2004", "shift_jisx0213", "utf_32", "utf_32_be", "utf_32_le", "utf_16", "utf_16_be", "utf_16_le", "utf_7", "utf_8", "utf_8_sig"]
def encode(string):
d = {}
for enc in encodings:
try:
d[enc.replace(" ", "␣")] = string.encode(enc)
except UnicodeEncodeError:
pass
return d
def decode(bytestr):
d = {}
for enc in encodings:
try:
d[enc.replace(" ", "␣")] = bytestr.decode(enc)
except UnicodeDecodeError:
pass
return d
def cleanprint(dic):
for k, v in dic.items():
print(k, v)
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("-d", "--double", action="store_true")
p.add_argument("-f", "--from")
p.add_argument("-F", "--from-eval")
p.add_argument("-t", "--to")
p.add_argument("-T", "--to-eval")
args = p.parse_args()
if not getattr(args, "from") and not args.from_eval:
p.error("either -f or -F must be provided")
if getattr(args, "from") and args.from_eval:
p.error("can't use both -f and -F")
if args.to and args.to_eval:
p.error("can't use both -t and -T")
if getattr(args, "from"):
encs = encode(getattr(args, "from"))
if args.double:
e1 = encs
encs = {}
for k1, f in e1.items():
for k2, v in decode(f).items():
encs[k1 + " " + k2] = v
elif args.from_eval:
encs = decode(ast.literal_eval(args.from_eval))
if args.double:
e1 = encs
encs = {}
for k1, f in e1.items():
for k2, v in encode(f).items():
encs[k1 + " " + k2] = v
if args.to:
encs = {k: v for k, v in encs.items() if v == args.to}
elif args.to_eval:
encs = {k: v for k, v in encs.items() if v == ast.literal_eval(args.to_eval)}
cleanprint(encs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment