Skip to content

Instantly share code, notes, and snippets.

@crherlihy
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save crherlihy/3b3b002dacea00c4667b to your computer and use it in GitHub Desktop.
Save crherlihy/3b3b002dacea00c4667b to your computer and use it in GitHub Desktop.
import unittest
import os, re, sys
import matplotlib.pyplot as plt
#root_dir= "C://Users//Amir//PycharmProjects//untitled"
#keyword = "keyword"
def CountPhraseinFolders(root_dir,keyword):
# The main recursive counter of folders with files
# who include keyword in them
D={}
for root, dirs, files in os.walk(root_dir):
FolderCount=0
for file in files:
here=os.path.join(root, file)
with open(here, "r") as fobj:
text = fobj.read()
count = len(re.findall(keyword, text))
subdir = root
D[subdir] = 0
if count > 0:
D[subdir] = D.get(subdir, 0) + 1
return D
def plot_data(data,keyword,dir):
#Bar plotting the outcome , keywork and dir are needed for axis titles.
# Assert data is dictionary type
assert type(data) == type({}), "Input data is not dictionary type."
# Assert keyword is regular expression
assert type(keyword) == type(re.compile('')), """Keyword argument is not
a reg expression."""
plt.figure(figsize=(5,5))
plt.bar(range(len(data)),data.values(),align='center',width=0.1)
plt.xticks(range(len(data)),data.keys(),fontsize=12, rotation=90)
plt.subplots_adjust(bottom=0.50)
plt.title(' Folders containing matches of \"%s\" under the root: %s' % (keyword.pattern,dir))
plt.xlabel('Folders',fontsize=15)
plt.ylabel('Occurrences #',fontsize=15)
if __name__ == '__main__':
regex_str = 'key' #'[a-z0-9]'
keyword = re.compile(regex_str)
root_dir= "C://Users//Amir//PycharmProjects//untitled"
#this can be changed to any default
# Search for matches recursively
recursive_result = CountPhraseinFolders(root_dir,keyword)
# what if this gets none. whole main needs to be tested.
# Plot results in a bar chart
plot_data(recursive_result,keyword,root_dir);
plt.show()
__author__ = 'Amir'
# -----------------------------------------------------------------------------
# This code performs a set of unit tests on
# the reg_search.py module to make sure
# the code performs correctly.
# This code creates directory systems with
# sub-directories and files randomly and some
# special cases and a set of regex
# to test the counter in reg_search
# Import Statements -----------------------------------------------------------
import re
import os
import string
import random
import unittest
import shutil
import reg_search as rm
def id_generator(size=6, chars=string.ascii_letters + string.digits):
""" Returns a random combination of characters. """
return ''.join(random.choice(chars) for _ in range(size))
def create_directory_system(key):
""" Create a directory system for testing"""
def create_directory(directory):
""" Helper function for creating individual
directories. """
# If directory already exists, delete and re-create
try:
os.mkdir(directory)
except:
shutil.rmtree(directory)
os.mkdir(directory)
def create_files_and_find_match(path):
""" Helper function for creating files
and finding matches."""
# Result dictionary
result = {}
# Create a random number of files
file_num = random.randint(3,5)
for i in xrange(0,file_num):
# Create the file name
file_name = id_generator(size=random.randint(4,6))
# Write a random combination of letters and digits
with open(path+'/'+file_name+'.txt','w') as f:
for j in xrange(0,100):
f.write(id_generator(size=random.randint(50,100))+'/n')
f.close()
# Read out and find expression matches, storing in result
with open(path+'/'+file_name+'.txt','r') as f:
list_matches = re.findall(key,f.read())
result[path+'/'+file_name+'.txt'] = len(list_matches)
f.close()
return result
# Create top directory
top_dir = id_generator(size=random.randint(4,6))
create_directory(top_dir)
# Create files in top_dir and find matches
result_dict = create_files_and_find_match(top_dir)
print result_dict
# Create directories and files in directories within the top directory,
# finding matches, storing, and updating the result_dict
for num in xrange(0,random.randint(3,5)):
dir_name = id_generator(size=random.randint(4,6))
create_directory(top_dir+'/'+dir_name)
result_i = create_files_and_find_match(top_dir+'/'+dir_name)
for d in [result_i]:
result_dict.update(d)
# Return the name of the top directory and the matches result
return top_dir, result_dict
class TestRegexMatches(unittest.TestCase):
""" Class designed for unit testing. """
def testEmptyRegex(self):
""" Matching negative look-ahead
should always return null!. """
top_dir, result = create_directory_system(re.compile(''))
key = re.compile('(?!)')
self.assertEqual(sum(rm.CountPhraseinFolders(top_dir,key).values()),0)
shutil.rmtree(top_dir)
def testIntersection(self):
""" Intersection of ^ should
not be equivalent. """
top_dir, result = create_directory_system(re.compile(''))
key1 = re.compile('[abc]')
key2 = re.compile('[^abc]')
self.assertNotEqual(rm.CountPhraseinFolders(top_dir,key1),
rm.CountPhraseinFolders(top_dir,key2))
shutil.rmtree(top_dir)
def testDirectorySystem(self):
""" Perform a stochastic self-testign system,
making sure recursive search is consistent
with search performed on creation. """
# Number of top directories to test
num_trials = 100
# Generate possible regex list
list_regex = [re.compile('['+id_generator(size=random.randint(4,6))+']'+
'['+id_generator(size=random.randint(4,6))+']'+
'['+id_generator(size=random.randint(4,6))+']'+
'[^'+id_generator(size=random.randint(2,4))+']')
for i in xrange(0,num_trials)]
# Run through each directory, testing outputs
for i in xrange(0,num_trials):
key = random.choice(list_regex)
top_dir, result = create_directory_system(key)
self.assertEqual(rm.CountPhraseinFolders(top_dir,key),result)
# Remove created directory
shutil.rmtree(top_dir)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment