Created
June 29, 2020 21:55
-
-
Save stestagg/0c0bee3556b878c2f322f6530fcc24a5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string; | |
import keyword | |
import json | |
import ast | |
ASSIGN_OPS = """ | |
'+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' | |
""" | |
OPS = "*|^&+-@/%/()[]~" | |
MULTI_OPS = ['<<', '>>', '//', '**', '...'] | |
MODS = """ | |
[\"__future__\",\"__main__\",\"_dummy_thread\",\"_thread\",\"abc\",\"aifc\",\"argparse\",\"array\",\"ast\",\"asynchat\",\"asyncio\",\"asyncore\",\"atexit\",\"audioop\",\"base64\",\"bdb\",\"binascii\",\"binhex\",\"bisect\",\"builtins\",\"bz2\",\"calendar\",\"cgi\",\"cgitb\",\"chunk\",\"cmath\",\"cmd\",\"code\",\"codecs\",\"codeop\",\"collections\",\"collections.abc\",\"colorsys\",\"compileall\",\"concurrent\",\"concurrent.futures\",\"configparser\",\"contextlib\",\"contextvars\",\"copy\",\"copyreg\",\"cProfile\",\"crypt\",\"csv\",\"ctypes\",\"curses\",\"curses.ascii\",\"curses.panel\",\"curses.textpad\",\"dataclasses\",\"datetime\",\"dbm\",\"dbm.dumb\",\"dbm.gnu\",\"dbm.ndbm\",\"decimal\",\"difflib\",\"dis\",\"distutils\",\"distutils.archive_util\",\"distutils.bcppcompiler\",\"distutils.ccompiler\",\"distutils.cmd\",\"distutils.command\",\"distutils.command.bdist\",\"distutils.command.bdist_dumb\",\"distutils.command.bdist_msi\",\"distutils.command.bdist_packager\",\"distutils.command.bdist_rpm\",\"distutils.command.bdist_wininst\",\"distutils.command.build\",\"distutils.command.build_clib\",\"distutils.command.build_ext\",\"distutils.command.build_py\",\"distutils.command.build_scripts\",\"distutils.command.check\",\"distutils.command.clean\",\"distutils.command.config\",\"distutils.command.install\",\"distutils.command.install_data\",\"distutils.command.install_headers\",\"distutils.command.install_lib\",\"distutils.command.install_scripts\",\"distutils.command.register\",\"distutils.command.sdist\",\"distutils.core\",\"distutils.cygwinccompiler\",\"distutils.debug\",\"distutils.dep_util\",\"distutils.dir_util\",\"distutils.dist\",\"distutils.errors\",\"distutils.extension\",\"distutils.fancy_getopt\",\"distutils.file_util\",\"distutils.filelist\",\"distutils.log\",\"distutils.msvccompiler\",\"distutils.spawn\",\"distutils.sysconfig\",\"distutils.text_file\",\"distutils.unixccompiler\",\"distutils.util\",\"distutils.version\",\"doctest\",\"dummy_threading\",\"email\",\"email.charset\",\"email.contentmanager\",\"email.encoders\",\"email.errors\",\"email.generator\",\"email.header\",\"email.headerregistry\",\"email.iterators\",\"email.message\",\"email.mime\",\"email.parser\",\"email.policy\",\"email.utils\",\"encodings\",\"encodings.idna\",\"encodings.mbcs\",\"encodings.utf_8_sig\",\"ensurepip\",\"enum\",\"errno\",\"faulthandler\",\"fcntl\",\"filecmp\",\"fileinput\",\"fnmatch\",\"formatter\",\"fractions\",\"ftplib\",\"functools\",\"gc\",\"getopt\",\"getpass\",\"gettext\",\"glob\",\"grp\",\"gzip\",\"hashlib\",\"heapq\",\"hmac\",\"html\",\"html.entities\",\"html.parser\",\"http\",\"http.client\",\"http.cookiejar\",\"http.cookies\",\"http.server\",\"imaplib\",\"imghdr\",\"imp\",\"importlib\",\"importlib.abc\",\"importlib.machinery\",\"importlib.resources\",\"importlib.util\",\"inspect\",\"io\",\"ipaddress\",\"itertools\",\"json\",\"json.tool\",\"keyword\",\"lib2to3\",\"linecache\",\"locale\",\"logging\",\"logging.config\",\"logging.handlers\",\"lzma\",\"mailbox\",\"mailcap\",\"marshal\",\"math\",\"mimetypes\",\"mmap\",\"modulefinder\",\"msilib\",\"msvcrt\",\"multiprocessing\",\"multiprocessing.connection\",\"multiprocessing.dummy\",\"multiprocessing.managers\",\"multiprocessing.pool\",\"multiprocessing.shared_memory\",\"multiprocessing.sharedctypes\",\"netrc\",\"nis\",\"nntplib\",\"numbers\",\"operator\",\"optparse\",\"os\",\"os.path\",\"ossaudiodev\",\"parser\",\"pathlib\",\"pdb\",\"pickle\",\"pickletools\",\"pipes\",\"pkgutil\",\"platform\",\"plistlib\",\"poplib\",\"posix\",\"pprint\",\"profile\",\"pstats\",\"pty\",\"pwd\",\"py_compile\",\"pyclbr\",\"pydoc\",\"queue\",\"quopri\",\"random\",\"re\",\"readline\",\"reprlib\",\"resource\",\"rlcompleter\",\"runpy\",\"sched\",\"secrets\",\"select\",\"selectors\",\"shelve\",\"shlex\",\"shutil\",\"signal\",\"site\",\"smtpd\",\"smtplib\",\"sndhdr\",\"socket\",\"socketserver\",\"spwd\",\"sqlite3\",\"ssl\",\"stat\",\"statistics\",\"string\",\"stringprep\",\"struct\",\"subprocess\",\"sunau\",\"symbol\",\"symtable\",\"sys\",\"sysconfig\",\"syslog\",\"tabnanny\",\"tarfile\",\"telnetlib\",\"tempfile\",\"termios\",\"test\",\"test.support\",\"test.support.script_helper\",\"textwrap\",\"threading\",\"time\",\"timeit\",\"tkinter\",\"tkinter.scrolledtext\",\"tkinter.tix\",\"tkinter.ttk\",\"token\",\"tokenize\",\"trace\",\"traceback\",\"tracemalloc\",\"tty\",\"turtle\",\"turtledemo\",\"types\",\"typing\",\"unicodedata\",\"unittest\",\"unittest.mock\",\"urllib\",\"urllib.error\",\"urllib.parse\",\"urllib.request\",\"urllib.response\",\"urllib.robotparser\",\"uu\",\"uuid\",\"venv\",\"warnings\",\"wave\",\"weakref\",\"webbrowser\",\"winreg\",\"winsound\",\"wsgiref\",\"wsgiref.handlers\",\"wsgiref.headers\",\"wsgiref.simple_server\",\"wsgiref.util\",\"wsgiref.validate\",\"xdrlib\",\"xml\",\"xml.dom\",\"xml.dom.minidom\",\"xml.dom.pulldom\",\"xml.etree.ElementTree\",\"xml.parsers.expat\",\"xml.parsers.expat.errors\",\"xml.parsers.expat.model\",\"xml.sax\",\"xml.sax.handler\",\"xml.sax.saxutils\",\"xml.sax.xmlreader\",\"xmlrpc\",\"xmlrpc.client\",\"xmlrpc.server\",\"zipapp\",\"zipfile\",\"zipimport\",\"zlib\"] | |
""" | |
EXTRAS = ['print(', 'try:', 'except:', 'else:', 'if 1:', 'while False:'] | |
def get_tokens(): | |
tokens = [] | |
tokens.extend(keyword.kwlist) | |
ao = [ast.literal_eval(o.strip()) for o in ASSIGN_OPS.split(" | ")] | |
tokens.extend(ao) | |
tokens.extend(__builtins__.__dict__.keys()) | |
tokens.extend(OPS) | |
tokens.extend(MULTI_OPS) | |
tokens.extend(json.loads(MODS)) | |
tokens.extend(EXTRAS) | |
return tokens | |
ALLOWED = (string.ascii_letters + string.digits + " _-.:").encode('latin1') | |
def encode_byte(b): | |
return (f'\\x{b:02g}').encode('latin1') | |
def encode(x): | |
return b"".join([bytes([b]) if b in ALLOWED else encode_byte(b) for b in x.encode('latin1')]) | |
def main(): | |
tokens = set(get_tokens()) | |
with open("toplevel.txt", "r") as fh: | |
tokens.update(ast.literal_eval(fh.read())) | |
tokens = set(f'"{encode(t).decode()}"' for t in tokens) | |
for line in open("env/autodict.txt"): | |
tokens.add(line.strip()) | |
for tok in sorted(tokens): | |
print(tok) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment