Skip to content

Instantly share code, notes, and snippets.

@csm10495
Created October 3, 2016 05:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save csm10495/9551dd8bcd06510cbd868954c920353b to your computer and use it in GitHub Desktop.
Save csm10495/9551dd8bcd06510cbd868954c920353b to your computer and use it in GitHub Desktop.
Quick and Dirty Recursive File Regex Counter
'''
Brief:
This searchs a directory recursively (by default the current directory) for a given regex match. The total number of matches is counted and printed at the end.
By default, it looks for lines where the author tag is given but there is no name provided.
Author(s):
Charles Machalow
'''
import os, re
REGEX = re.compile(r".*Author.*:\n(?:\s*|\s*\'\'\'\s*|\s*\"\"\"\s*)\n")
MAX_FILE_SIZE = 1024 * 128 #128 KB
def searchFolder(folder):
regexMatches = 0
try:
for file in os.listdir(folder):
if os.path.isfile(file):
filePath = os.path.join(os.getcwd(), file)
try:
print ('File: %s' % filePath)
except UnicodeEncodeError:
print ('File: %s' % filePath.encode('utf8'))
fileLen = len(file)
spaceLen = len(filePath) - fileLen
if os.stat(file).st_size < MAX_FILE_SIZE:
with open(file, 'r') as f:
try:
# Some files have lots of extra null chars... don't know why...
txt = f.read().strip('\0')
except UnicodeDecodeError as ex:
print (' ' + ' ' * spaceLen + ('^' * fileLen) + ' Skipping File... odd byte found (%s)' % ex.reason)
continue
except Exception as ex:
print (' ' + ' ' * spaceLen + ('^' * fileLen) + ' Skipping File... odd byte found (Generic Exception)')
continue
regexMatches += len(re.findall(REGEX, txt))
else:
print ('Folder: %s' % os.path.join(os.getcwd(), file))
os.chdir(file)
regexMatches += searchFolder('.')
os.chdir('../')
except PermissionError:
pass
return regexMatches
if __name__ == '__main__':
print ("\nRegex Matches: %d" % (searchFolder('.')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment