Last active
August 29, 2015 14:22
-
-
Save crherlihy/3b3b002dacea00c4667b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import os, re, sys | |
import matplotlib.pyplot as plt | |
#root_dir= "C://Users//Amir//PycharmProjects//untitled" | |
#keyword = "keyword" | |
def CountPhraseinFolders(root_dir,keyword): | |
# The main recursive counter of folders with files | |
# who include keyword in them | |
D={} | |
for root, dirs, files in os.walk(root_dir): | |
FolderCount=0 | |
for file in files: | |
here=os.path.join(root, file) | |
with open(here, "r") as fobj: | |
text = fobj.read() | |
count = len(re.findall(keyword, text)) | |
subdir = root | |
D[subdir] = 0 | |
if count > 0: | |
D[subdir] = D.get(subdir, 0) + 1 | |
return D | |
def plot_data(data,keyword,dir): | |
#Bar plotting the outcome , keywork and dir are needed for axis titles. | |
# Assert data is dictionary type | |
assert type(data) == type({}), "Input data is not dictionary type." | |
# Assert keyword is regular expression | |
assert type(keyword) == type(re.compile('')), """Keyword argument is not | |
a reg expression.""" | |
plt.figure(figsize=(5,5)) | |
plt.bar(range(len(data)),data.values(),align='center',width=0.1) | |
plt.xticks(range(len(data)),data.keys(),fontsize=12, rotation=90) | |
plt.subplots_adjust(bottom=0.50) | |
plt.title(' Folders containing matches of \"%s\" under the root: %s' % (keyword.pattern,dir)) | |
plt.xlabel('Folders',fontsize=15) | |
plt.ylabel('Occurrences #',fontsize=15) | |
if __name__ == '__main__': | |
regex_str = 'key' #'[a-z0-9]' | |
keyword = re.compile(regex_str) | |
root_dir= "C://Users//Amir//PycharmProjects//untitled" | |
#this can be changed to any default | |
# Search for matches recursively | |
recursive_result = CountPhraseinFolders(root_dir,keyword) | |
# what if this gets none. whole main needs to be tested. | |
# Plot results in a bar chart | |
plot_data(recursive_result,keyword,root_dir); | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'Amir' | |
# ----------------------------------------------------------------------------- | |
# This code performs a set of unit tests on | |
# the reg_search.py module to make sure | |
# the code performs correctly. | |
# This code creates directory systems with | |
# sub-directories and files randomly and some | |
# special cases and a set of regex | |
# to test the counter in reg_search | |
# Import Statements ----------------------------------------------------------- | |
import re | |
import os | |
import string | |
import random | |
import unittest | |
import shutil | |
import reg_search as rm | |
def id_generator(size=6, chars=string.ascii_letters + string.digits): | |
""" Returns a random combination of characters. """ | |
return ''.join(random.choice(chars) for _ in range(size)) | |
def create_directory_system(key): | |
""" Create a directory system for testing""" | |
def create_directory(directory): | |
""" Helper function for creating individual | |
directories. """ | |
# If directory already exists, delete and re-create | |
try: | |
os.mkdir(directory) | |
except: | |
shutil.rmtree(directory) | |
os.mkdir(directory) | |
def create_files_and_find_match(path): | |
""" Helper function for creating files | |
and finding matches.""" | |
# Result dictionary | |
result = {} | |
# Create a random number of files | |
file_num = random.randint(3,5) | |
for i in xrange(0,file_num): | |
# Create the file name | |
file_name = id_generator(size=random.randint(4,6)) | |
# Write a random combination of letters and digits | |
with open(path+'/'+file_name+'.txt','w') as f: | |
for j in xrange(0,100): | |
f.write(id_generator(size=random.randint(50,100))+'/n') | |
f.close() | |
# Read out and find expression matches, storing in result | |
with open(path+'/'+file_name+'.txt','r') as f: | |
list_matches = re.findall(key,f.read()) | |
result[path+'/'+file_name+'.txt'] = len(list_matches) | |
f.close() | |
return result | |
# Create top directory | |
top_dir = id_generator(size=random.randint(4,6)) | |
create_directory(top_dir) | |
# Create files in top_dir and find matches | |
result_dict = create_files_and_find_match(top_dir) | |
print result_dict | |
# Create directories and files in directories within the top directory, | |
# finding matches, storing, and updating the result_dict | |
for num in xrange(0,random.randint(3,5)): | |
dir_name = id_generator(size=random.randint(4,6)) | |
create_directory(top_dir+'/'+dir_name) | |
result_i = create_files_and_find_match(top_dir+'/'+dir_name) | |
for d in [result_i]: | |
result_dict.update(d) | |
# Return the name of the top directory and the matches result | |
return top_dir, result_dict | |
class TestRegexMatches(unittest.TestCase): | |
""" Class designed for unit testing. """ | |
def testEmptyRegex(self): | |
""" Matching negative look-ahead | |
should always return null!. """ | |
top_dir, result = create_directory_system(re.compile('')) | |
key = re.compile('(?!)') | |
self.assertEqual(sum(rm.CountPhraseinFolders(top_dir,key).values()),0) | |
shutil.rmtree(top_dir) | |
def testIntersection(self): | |
""" Intersection of ^ should | |
not be equivalent. """ | |
top_dir, result = create_directory_system(re.compile('')) | |
key1 = re.compile('[abc]') | |
key2 = re.compile('[^abc]') | |
self.assertNotEqual(rm.CountPhraseinFolders(top_dir,key1), | |
rm.CountPhraseinFolders(top_dir,key2)) | |
shutil.rmtree(top_dir) | |
def testDirectorySystem(self): | |
""" Perform a stochastic self-testign system, | |
making sure recursive search is consistent | |
with search performed on creation. """ | |
# Number of top directories to test | |
num_trials = 100 | |
# Generate possible regex list | |
list_regex = [re.compile('['+id_generator(size=random.randint(4,6))+']'+ | |
'['+id_generator(size=random.randint(4,6))+']'+ | |
'['+id_generator(size=random.randint(4,6))+']'+ | |
'[^'+id_generator(size=random.randint(2,4))+']') | |
for i in xrange(0,num_trials)] | |
# Run through each directory, testing outputs | |
for i in xrange(0,num_trials): | |
key = random.choice(list_regex) | |
top_dir, result = create_directory_system(key) | |
self.assertEqual(rm.CountPhraseinFolders(top_dir,key),result) | |
# Remove created directory | |
shutil.rmtree(top_dir) | |
if __name__ == '__main__': | |
unittest.main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment