Skip to content

Instantly share code, notes, and snippets.

@shiedman
Last active February 1, 2018 06:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save shiedman/5477190 to your computer and use it in GitHub Desktop.
Save shiedman/5477190 to your computer and use it in GitHub Desktop.
import sys,os,string,random
if len(sys.argv)==1:
print 'warn: encoding name missing'
print 'python %s <encoding name>'%(os.path.basename(__file__))
sys.exit(0)
encoding=sys.argv[1]
charset={}
n=0xff+1#skip 0x00-0xFF
while n<0xfffd:
try:
u=unichr(n)
c=u.encode(encoding)
#output 2 bytes char, skip others
if len(c)==2:
charset[int(c.encode('hex'),16)]=n
else:
#print c.encode('hex')
pass
except UnicodeEncodeError:
pass
except LookupError as e:
print e.message
sys.exit(1)
except:
print 'Unexpected error:', sys.exc_info()[0]
raise
finally:
n+=1
tableTmpl='''// auto generated mapping table, ${encoding} ==> unicode
var iconv=require('iconv-lite')
var encoding='${encoding}',_encoding=encoding.replace(/[- ]/g,'')
/**
* set aliase name for ${encoding}
* for big5, iconv.encoding['cp950']=_encoding
*/
//iconv.encodings['aliase1']=_encoding
//iconv.encodings['aliase2']=_encoding
iconv.encodings[encoding]=_encoding
iconv.encodings[_encoding]={
type:'table',
table:{
'''
with open(encoding+'.js','wb') as f:
t=string.Template(tableTmpl)
f.write(t.substitute(encoding=encoding))
for k in sorted(charset.keys()):
v=charset[k]
f.write(' 0x%04X:0x%04X,//%s\n'%(k,v,unichr(v).encode('utf8')))
f.write(' }\n}')
print 'generated %s.js'%encoding
testTmpl='''
var iconv=require('iconv-lite'),
assert=require('assert');
require('./${encoding}.js');
var testStr='${testStr}abc',
testBuf=new Buffer('${testStringHex}616263','hex');
var resultBuf=iconv.encode(testStr,'${encoding}'),
resultStr=iconv.decode(testBuf,'${encoding}');
assert.strictEqual(testStr,resultStr);
assert.strictEqual(testBuf.toString('hex'),resultBuf.toString('hex'));
console.log('test passed');
'''
with open(encoding+'Test.js','wb') as f:
chars=random.sample(charset.keys(),5);
testStr=''.join([unichr(charset[c]) for c in chars]).encode('utf8')
testStringHex=''.join(['%X'%c for c in chars])
t=string.Template(testTmpl)
f.write(t.substitute(
encoding=encoding,testStr=testStr,testStringHex=testStringHex))
print 'generated %sTest.js'%encoding
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment