Created
August 13, 2019 07:18
-
-
Save se4u/5693743e058f4df769e5306dc10a2923 to your computer and use it in GitHub Desktop.
Example of python meta programming by modifying the token sequence output by the inbuilt tokenizer and codecs used for python parsing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## --- Save this file as pymeta.py ---- ## | |
# inspired by pyxl and github.com/SyrusAkbary/interpy | |
# >> import pymeta | |
# >> print(codecs.decode("1\n2", encoding='pymeta')) | |
# 1 | |
# import time; print(time.time()); | |
# 2 | |
## ---------------------- ## | |
from six import StringIO | |
import codecs, tokenize, encodings | |
from tokenize import TokenInfo | |
UTF8 = encodings.search_function('utf8') | |
LOGFILE = open('tmp.log','wb',0) | |
sio = StringIO("import time; print(time.time());") | |
toinject = list(tokenize.tokenize(lambda : sio.readline().encode('utf8')))[1:-2] | |
def inject(a): | |
cr,coff=0,0 | |
for e in a: | |
cr = e.end[0] | |
yield TokenInfo(e[0], e[1], (cr,e[2][1]+coff), (cr,e[3][1]+coff), e[4]) | |
if e.type == tokenize.NEWLINE and e.string != '': | |
cr += 1 | |
coff = 0 | |
for ee in toinject: | |
coff += ee.end[1] | |
yield TokenInfo(ee[0], ee[1], (cr,ee[2][1]), (cr,ee[3][1]), ee[4]) | |
def transform(stream): | |
a = tokenize.tokenize(lambda:(stream.readline()).encode('utf8')) | |
a = list(inject(a)) | |
LOGFILE.write('\n'.join([str(e) for e in a]).encode('utf8')+b'\n') | |
return tokenize.untokenize(iter(a)) | |
def decode(input, errors='strict'): | |
if isinstance(input, memoryview): | |
input = input.tobytes().decode("utf-8") | |
return UTF8.decode(transform(StringIO(input)), errors) | |
class IncrementalDecoder(encodings.utf_8.IncrementalDecoder): | |
def decode(self, input, final=False): | |
self.buffer += input | |
if final: | |
buff = self.buffer | |
self.buffer = '' | |
return super(IncrementalDecoder, self).decode( | |
transform(StringIO(buff)), final=True) | |
class StreamReader(encodings.utf_8.StreamReader): | |
def __init__(self, *args, **kwargs): | |
codecs.StreamReader.__init__(self, *args, **kwargs) | |
self.stream = StringIO(transform(self.stream)) | |
D = {'pymeta': codecs.CodecInfo( | |
name='pymeta', | |
encode=UTF8.encode, | |
decode=decode, | |
incrementalencoder=UTF8.incrementalencoder, | |
incrementaldecoder=IncrementalDecoder, | |
streamreader=StreamReader, | |
streamwriter=UTF8.streamwriter)} | |
codecs.register(D.get) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment