Skip to content

Instantly share code, notes, and snippets.

@daniel-sc
Last active June 16, 2016 08:53
Show Gist options
  • Save daniel-sc/a43618a187faca803c931683b262af04 to your computer and use it in GitHub Desktop.
Save daniel-sc/a43618a187faca803c931683b262af04 to your computer and use it in GitHub Desktop.
Build a generic (transitive) reference tree for any source files.
import re
import argparse
import os
import pprint
import mimetypes
parser = argparse.ArgumentParser(description='Listing transitive references.')
parser.add_argument('start', nargs='+', help='Starting file(s).')
parser.add_argument('--pattern', default='[\'"]([^\'":[\]]+\.(?:png|jpg|gif|xml|html?|php|js))[\'"]',
help='Regex for matching references. \
Should include one matching group for the complete relative reference.')
parser.add_argument('--text_files', nargs='+', default=[],
help='List of non standard text file extensions that should be considered. E.g.: "myext".')
parser.add_argument('-v', help='Verbose.', default=False, action="store_true")
args = parser.parse_args()
pattern = re.compile(args.pattern)
unprocessedFiles = {os.path.realpath(os.path.abspath(inputFile)) for inputFile in args.start}
refMap = dict()
inverseRefMap = dict()
for ext in args.text_files:
mimetypes.add_type('text/' + ext, '.' + ext)
while len(unprocessedFiles) > 0:
currentFile = unprocessedFiles.pop()
currentDir = os.path.dirname(currentFile) + '/'
refMap[currentFile] = set()
(type, encoding) = mimetypes.guess_type(currentFile, False)
if type is None or 'text' not in type:
if args.v:
print('Skipped binary file (mimetype=' + (type or 'None') + ', encoding=' + (encoding or 'None') + ') ' + currentFile )
continue
with open(currentFile, 'r', encoding="ascii", errors="surrogateescape") as f:
for line in f:
for match in re.findall(pattern, line):
referencedFile = os.path.realpath(os.path.abspath(currentDir + match))
refMap[currentFile].add(referencedFile)
if referencedFile not in refMap:
if os.path.isfile(referencedFile):
unprocessedFiles.add(referencedFile)
else:
print('WARN found broken reference: ' + match + ' in file: ' + currentFile)
if referencedFile not in inverseRefMap:
inverseRefMap[referencedFile] = set()
inverseRefMap[referencedFile].add(currentFile)
print("\nReferences: ")
pprint.pprint(refMap)
print("\nInverse References: ")
pprint.pprint(inverseRefMap)
print("\nTotal files visited: " + str(len(refMap)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment