public
Created

  • Download Gist
utf-8-binencode.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
import operator
import functools
import itertools
import os
import json
import struct
#import math
 
import bitarray
 
def encode(minbytes):
 
mbinarr = bitarray.bitarray()
mbinarr.frombytes(minbytes)
 
orglength = len(mbinarr)
for i in itertools.count(0,8):
pos = orglength - 7*(i//8) - 7
if pos < 0: break
mbinarr.insert(pos,0)
 
padlen = (8 - (len(mbinarr) % 8)) % 8
#padlen2 = math.ceil(len(mbinarr) / 8) * 8 - len(mbinarr)
#if padlen != padlen2: raise Exception("uoverenstemmelse!" + str((len(mbinarr), padlen, padlen2, mbinarr)))
 
for i in range(padlen): mbinarr.insert(0,0)
 
mstring = mbinarr.tobytes().decode("utf-8")
 
return (orglength, mstring)
 
#tochars1 = lambda mbinarr: '' if len(mbinarr) == 0 else bin(int.from_bytes(mbinarr.tobytes(), 'big'))[2:].zfill(len(mbinarr))
#tochars2 = lambda mbinarr: ''.join(list(map(lambda x: '1' if x else '0', mbinarr.tolist()))) # .zfill(mbinarr.length())
#tochars3 = lambda mbinarr: mbinarr.to01()
 
def decode(pair):
(rawlen, mstring) = pair
 
mbinarr = bitarray.bitarray()
mbinarr.frombytes(mstring.encode())
 
orglength = len(mbinarr)
 
for i in itertools.count(0,8):
pos = orglength - i - 8
if pos < 0: break
mbinarr.pop(pos)
 
for i in range(len(mbinarr)-rawlen):
mbinarr.pop(0)
 
return mbinarr.tobytes()
#data = (0x12345678abcdef).to_bytes(7,'big')
#data = (0xffffffffffffff).to_bytes(7,'big')
#data = functools.reduce(operator.add, (map(lambda x: x.to_bytes(4,'big'), [0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff])))
 
def test():
def iteration(data):
data2 = decode(encode(data))
 
if data2 != data:
raise Exception("error!\n{}\n{}".format(data, data2))
 
iteration(b"")
 
n = 100
for i in range(1,n+1):
l = int.from_bytes(os.urandom(2),'big') >> 7
data = os.urandom(l)
iteration(data)
if (i) % (n // 10) == 0: print(i // (n // 10) * 10 , '% complete')
 
if __name__ == "__main__": test()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.