Created
May 29, 2014 15:20
-
-
Save kitsuyui/11cb65ef4f4630432bfa to your computer and use it in GitHub Desktop.
This script counts Python reserved keywords and built-in function names. Even if it is contained by tgz or zip archive.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf8 | |
import __builtin__ | |
import keyword | |
import os | |
import tarfile | |
import tokenize | |
import zipfile | |
from collections import Counter | |
def is_python_filepath(pathname): | |
return os.path.splitext(pathname)[1] == '.py' | |
def walkarchive(path, pathname_filter): | |
if os.path.isdir(path): | |
for root, _, filenames in os.walk(path): | |
for name in filenames: | |
if pathname_filter(name): | |
yield open(os.path.join(root, name), 'r') | |
elif tarfile.is_tarfile(path): | |
tf = tarfile.open(path, 'r|*') | |
for info in tf: | |
if pathname_filter(info.name): | |
yield tf.extractfile(info) | |
elif zipfile.is_zipfile(path): | |
zf = zipfile.ZipFile(path) | |
for name in zf.namelist(): | |
if pathname_filter(name): | |
yield zf.open(name) | |
elif pathname_filter(path): | |
yield open(path, 'r') | |
def tokens_without_whitespaces(infile): | |
tokens = tokenize.generate_tokens(infile.readline) | |
for _, token, _, _, _ in tokens: | |
text = token.strip() | |
if text: | |
yield text | |
def keywords(): | |
kws = keyword.kwlist | |
blt = dir(__builtin__) | |
return tuple(kws + blt) | |
def tokens_reserved_only(infile): | |
k = keywords() | |
for token in tokens_without_whitespaces(infile): | |
if token in k: | |
yield token | |
def counting_archive(filepath): | |
c = Counter() | |
pyfiles = walkarchive(filepath, is_python_filepath) | |
for f in pyfiles: | |
for token in tokens_reserved_only(f): | |
c[token] += 1 | |
return c | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filepath', nargs='+', help='archive filepaths.') | |
p = parser.parse_args() | |
counters = (counting_archive(path) for path in p.filepath) | |
summary = sum(counters, Counter()) | |
for key, count in summary.most_common(): | |
print('{}\t{}'.format(key, count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment