Last active
June 16, 2016 08:53
-
-
Save daniel-sc/a43618a187faca803c931683b262af04 to your computer and use it in GitHub Desktop.
Build a generic (transitive) reference tree for any source files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import argparse | |
import os | |
import pprint | |
import mimetypes | |
parser = argparse.ArgumentParser(description='Listing transitive references.') | |
parser.add_argument('start', nargs='+', help='Starting file(s).') | |
parser.add_argument('--pattern', default='[\'"]([^\'":[\]]+\.(?:png|jpg|gif|xml|html?|php|js))[\'"]', | |
help='Regex for matching references. \ | |
Should include one matching group for the complete relative reference.') | |
parser.add_argument('--text_files', nargs='+', default=[], | |
help='List of non standard text file extensions that should be considered. E.g.: "myext".') | |
parser.add_argument('-v', help='Verbose.', default=False, action="store_true") | |
args = parser.parse_args() | |
pattern = re.compile(args.pattern) | |
unprocessedFiles = {os.path.realpath(os.path.abspath(inputFile)) for inputFile in args.start} | |
refMap = dict() | |
inverseRefMap = dict() | |
for ext in args.text_files: | |
mimetypes.add_type('text/' + ext, '.' + ext) | |
while len(unprocessedFiles) > 0: | |
currentFile = unprocessedFiles.pop() | |
currentDir = os.path.dirname(currentFile) + '/' | |
refMap[currentFile] = set() | |
(type, encoding) = mimetypes.guess_type(currentFile, False) | |
if type is None or 'text' not in type: | |
if args.v: | |
print('Skipped binary file (mimetype=' + (type or 'None') + ', encoding=' + (encoding or 'None') + ') ' + currentFile ) | |
continue | |
with open(currentFile, 'r', encoding="ascii", errors="surrogateescape") as f: | |
for line in f: | |
for match in re.findall(pattern, line): | |
referencedFile = os.path.realpath(os.path.abspath(currentDir + match)) | |
refMap[currentFile].add(referencedFile) | |
if referencedFile not in refMap: | |
if os.path.isfile(referencedFile): | |
unprocessedFiles.add(referencedFile) | |
else: | |
print('WARN found broken reference: ' + match + ' in file: ' + currentFile) | |
if referencedFile not in inverseRefMap: | |
inverseRefMap[referencedFile] = set() | |
inverseRefMap[referencedFile].add(currentFile) | |
print("\nReferences: ") | |
pprint.pprint(refMap) | |
print("\nInverse References: ") | |
pprint.pprint(inverseRefMap) | |
print("\nTotal files visited: " + str(len(refMap))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment