Skip to content

Instantly share code, notes, and snippets.

@stestagg
Created June 29, 2020 21:55
Show Gist options
  • Save stestagg/0c0bee3556b878c2f322f6530fcc24a5 to your computer and use it in GitHub Desktop.
Save stestagg/0c0bee3556b878c2f322f6530fcc24a5 to your computer and use it in GitHub Desktop.
import string;
import keyword
import json
import ast
ASSIGN_OPS = """
'+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//='
"""
OPS = "*|^&+-@/%/()[]~"
MULTI_OPS = ['<<', '>>', '//', '**', '...']
MODS = """
[\"__future__\",\"__main__\",\"_dummy_thread\",\"_thread\",\"abc\",\"aifc\",\"argparse\",\"array\",\"ast\",\"asynchat\",\"asyncio\",\"asyncore\",\"atexit\",\"audioop\",\"base64\",\"bdb\",\"binascii\",\"binhex\",\"bisect\",\"builtins\",\"bz2\",\"calendar\",\"cgi\",\"cgitb\",\"chunk\",\"cmath\",\"cmd\",\"code\",\"codecs\",\"codeop\",\"collections\",\"collections.abc\",\"colorsys\",\"compileall\",\"concurrent\",\"concurrent.futures\",\"configparser\",\"contextlib\",\"contextvars\",\"copy\",\"copyreg\",\"cProfile\",\"crypt\",\"csv\",\"ctypes\",\"curses\",\"curses.ascii\",\"curses.panel\",\"curses.textpad\",\"dataclasses\",\"datetime\",\"dbm\",\"dbm.dumb\",\"dbm.gnu\",\"dbm.ndbm\",\"decimal\",\"difflib\",\"dis\",\"distutils\",\"distutils.archive_util\",\"distutils.bcppcompiler\",\"distutils.ccompiler\",\"distutils.cmd\",\"distutils.command\",\"distutils.command.bdist\",\"distutils.command.bdist_dumb\",\"distutils.command.bdist_msi\",\"distutils.command.bdist_packager\",\"distutils.command.bdist_rpm\",\"distutils.command.bdist_wininst\",\"distutils.command.build\",\"distutils.command.build_clib\",\"distutils.command.build_ext\",\"distutils.command.build_py\",\"distutils.command.build_scripts\",\"distutils.command.check\",\"distutils.command.clean\",\"distutils.command.config\",\"distutils.command.install\",\"distutils.command.install_data\",\"distutils.command.install_headers\",\"distutils.command.install_lib\",\"distutils.command.install_scripts\",\"distutils.command.register\",\"distutils.command.sdist\",\"distutils.core\",\"distutils.cygwinccompiler\",\"distutils.debug\",\"distutils.dep_util\",\"distutils.dir_util\",\"distutils.dist\",\"distutils.errors\",\"distutils.extension\",\"distutils.fancy_getopt\",\"distutils.file_util\",\"distutils.filelist\",\"distutils.log\",\"distutils.msvccompiler\",\"distutils.spawn\",\"distutils.sysconfig\",\"distutils.text_file\",\"distutils.unixccompiler\",\"distutils.util\",\"distutils.version\",\"doctest\",\"dummy_threading\",\"email\",\"email.charset\",\"email.contentmanager\",\"email.encoders\",\"email.errors\",\"email.generator\",\"email.header\",\"email.headerregistry\",\"email.iterators\",\"email.message\",\"email.mime\",\"email.parser\",\"email.policy\",\"email.utils\",\"encodings\",\"encodings.idna\",\"encodings.mbcs\",\"encodings.utf_8_sig\",\"ensurepip\",\"enum\",\"errno\",\"faulthandler\",\"fcntl\",\"filecmp\",\"fileinput\",\"fnmatch\",\"formatter\",\"fractions\",\"ftplib\",\"functools\",\"gc\",\"getopt\",\"getpass\",\"gettext\",\"glob\",\"grp\",\"gzip\",\"hashlib\",\"heapq\",\"hmac\",\"html\",\"html.entities\",\"html.parser\",\"http\",\"http.client\",\"http.cookiejar\",\"http.cookies\",\"http.server\",\"imaplib\",\"imghdr\",\"imp\",\"importlib\",\"importlib.abc\",\"importlib.machinery\",\"importlib.resources\",\"importlib.util\",\"inspect\",\"io\",\"ipaddress\",\"itertools\",\"json\",\"json.tool\",\"keyword\",\"lib2to3\",\"linecache\",\"locale\",\"logging\",\"logging.config\",\"logging.handlers\",\"lzma\",\"mailbox\",\"mailcap\",\"marshal\",\"math\",\"mimetypes\",\"mmap\",\"modulefinder\",\"msilib\",\"msvcrt\",\"multiprocessing\",\"multiprocessing.connection\",\"multiprocessing.dummy\",\"multiprocessing.managers\",\"multiprocessing.pool\",\"multiprocessing.shared_memory\",\"multiprocessing.sharedctypes\",\"netrc\",\"nis\",\"nntplib\",\"numbers\",\"operator\",\"optparse\",\"os\",\"os.path\",\"ossaudiodev\",\"parser\",\"pathlib\",\"pdb\",\"pickle\",\"pickletools\",\"pipes\",\"pkgutil\",\"platform\",\"plistlib\",\"poplib\",\"posix\",\"pprint\",\"profile\",\"pstats\",\"pty\",\"pwd\",\"py_compile\",\"pyclbr\",\"pydoc\",\"queue\",\"quopri\",\"random\",\"re\",\"readline\",\"reprlib\",\"resource\",\"rlcompleter\",\"runpy\",\"sched\",\"secrets\",\"select\",\"selectors\",\"shelve\",\"shlex\",\"shutil\",\"signal\",\"site\",\"smtpd\",\"smtplib\",\"sndhdr\",\"socket\",\"socketserver\",\"spwd\",\"sqlite3\",\"ssl\",\"stat\",\"statistics\",\"string\",\"stringprep\",\"struct\",\"subprocess\",\"sunau\",\"symbol\",\"symtable\",\"sys\",\"sysconfig\",\"syslog\",\"tabnanny\",\"tarfile\",\"telnetlib\",\"tempfile\",\"termios\",\"test\",\"test.support\",\"test.support.script_helper\",\"textwrap\",\"threading\",\"time\",\"timeit\",\"tkinter\",\"tkinter.scrolledtext\",\"tkinter.tix\",\"tkinter.ttk\",\"token\",\"tokenize\",\"trace\",\"traceback\",\"tracemalloc\",\"tty\",\"turtle\",\"turtledemo\",\"types\",\"typing\",\"unicodedata\",\"unittest\",\"unittest.mock\",\"urllib\",\"urllib.error\",\"urllib.parse\",\"urllib.request\",\"urllib.response\",\"urllib.robotparser\",\"uu\",\"uuid\",\"venv\",\"warnings\",\"wave\",\"weakref\",\"webbrowser\",\"winreg\",\"winsound\",\"wsgiref\",\"wsgiref.handlers\",\"wsgiref.headers\",\"wsgiref.simple_server\",\"wsgiref.util\",\"wsgiref.validate\",\"xdrlib\",\"xml\",\"xml.dom\",\"xml.dom.minidom\",\"xml.dom.pulldom\",\"xml.etree.ElementTree\",\"xml.parsers.expat\",\"xml.parsers.expat.errors\",\"xml.parsers.expat.model\",\"xml.sax\",\"xml.sax.handler\",\"xml.sax.saxutils\",\"xml.sax.xmlreader\",\"xmlrpc\",\"xmlrpc.client\",\"xmlrpc.server\",\"zipapp\",\"zipfile\",\"zipimport\",\"zlib\"]
"""
EXTRAS = ['print(', 'try:', 'except:', 'else:', 'if 1:', 'while False:']
def get_tokens():
tokens = []
tokens.extend(keyword.kwlist)
ao = [ast.literal_eval(o.strip()) for o in ASSIGN_OPS.split(" | ")]
tokens.extend(ao)
tokens.extend(__builtins__.__dict__.keys())
tokens.extend(OPS)
tokens.extend(MULTI_OPS)
tokens.extend(json.loads(MODS))
tokens.extend(EXTRAS)
return tokens
ALLOWED = (string.ascii_letters + string.digits + " _-.:").encode('latin1')
def encode_byte(b):
return (f'\\x{b:02g}').encode('latin1')
def encode(x):
return b"".join([bytes([b]) if b in ALLOWED else encode_byte(b) for b in x.encode('latin1')])
def main():
tokens = set(get_tokens())
with open("toplevel.txt", "r") as fh:
tokens.update(ast.literal_eval(fh.read()))
tokens = set(f'"{encode(t).decode()}"' for t in tokens)
for line in open("env/autodict.txt"):
tokens.add(line.strip())
for tok in sorted(tokens):
print(tok)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment