Skip to content

Instantly share code, notes, and snippets.

@jgarciabu
Created October 5, 2018 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jgarciabu/93e2c8c86d526fefbd2d2d0a16ef9f14 to your computer and use it in GitHub Desktop.
Save jgarciabu/93e2c8c86d526fefbd2d2d0a16ef9f14 to your computer and use it in GitHub Desktop.
Directory Search Tool (With GUI)
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 27 14:03:28 2018
@author: jeffrey.garcia
"""
import wx
import pandas as pd
import os
from os import listdir
import inspect
import zipfile
import glob as gb
import xlrd
import PyPDF2
import re
import wx.lib.scrolledpanel
import wx.lib.dialogs
import sys
class SearchTool(wx.Frame):
def __init__(self, *args, **kwargs):
super(SearchTool, self).__init__(*args, **kwargs)
self.filename = inspect.getframeinfo(inspect.currentframe()).filename
self.path = os.path.dirname(os.path.abspath(self.filename))
self.firstfilenamelist = listdir(self.path)
search_terms = wx.TextEntryDialog(None, "Enter your search terms as a pipe delimited list", 'Search Term Gathering', 'Enter Search Terms Here')
if search_terms.ShowModal() == wx.ID_OK:
search_terms = search_terms.GetValue()
self.search_terms_list = search_terms.split("|")
self.myPanel = wx.Panel(self)
self.basicGUI(self.search_terms_list)
self.Bind(wx.EVT_CLOSE, self.OnClose)
def basicGUI(self, search_terms_list):
wx.StaticText(self.myPanel, -1, "***************************************************************\n---------------------------------------------------------------\nTHANKS FOR USING THE SEARCH TOOL\nFOR SUGGESTIONS AND IDEAS\nPLEASE EMAIL jeffrey.garcia@tibersoft.com\n---------------------------------------------------------------\n***************************************************************",
style=wx.ALIGN_LEFT)
excel_results = self.excelWalkThrough()
pdf_results = self.pdfWalkThrough()
generic_results = self.genericWalkThrough(self.search_terms_list)
filename_results = self.filenameWalkThrough(self.search_terms_list)
all_results = "***************************************************************\n---------------------------------------------------------------\nDETAILED EXCEL SPREADSHEET MATCHES (IF ANY)\n---------------------------------------------------------------\n***************************************************************\n" + excel_results + "\n***************************************************************\n---------------------------------------------------------------\nPDF FILE MATCHES (IF ANY)\n---------------------------------------------------------------\n***************************************************************\n" + pdf_results + "\n***************************************************************\n---------------------------------------------------------------\nNON-EXCEL/NON-PDF FILE SEARCH RESULTS (IF ANY)\n---------------------------------------------------------------\n***************************************************************\n" + generic_results + filename_results
self.InfoText(self.myPanel, all_results)
def InfoText(self, parent, message, caption='Results!'):
dlg = wx.lib.dialogs.ScrolledMessageDialog(parent, message, caption, size=(800,600))
dlg.ShowModal()
dlg.Destroy()
def filenameWalkThrough(self, search_terms_list):
results = []
for word in self.search_terms_list:
files = gb.glob('*' + word + '*')
for file in files:
results.append("------------------------------------------\n{}: {} (In Filename)\n------------------------------------------\n".format(str(word), file))
fresh_results = list(set(results))
new_results = ''.join(fresh_results)
final_results = str(new_results)
return final_results
def genericWalkThrough(self, search_terms_list):
try:
results = []
for file in self.firstfilenamelist:
if file.upper().endswith('ZIP'):
zfile = zipfile.ZipFile(file)
for finfo in zfile.infolist():
ifile = zfile.open(finfo, mode='r')
for word in self.search_terms_list:
if word.upper() in finfo.filename.upper():
results.append("------------------------------------------\n{}: {} (In Filename of Zipped File)\n------------------------------------------\n".format(str(word), file))
for line in ifile:
for word in self.search_terms_list:
if word.encode().upper() in line.upper():
results.append("------------------------------------------\n{}: {} (Inside File)\n------------------------------------------\n".format(str(word), file))
if file.upper().endswith('CSV'):
with open(file, 'r') as ifile:
for line in ifile:
for word in self.search_terms_list:
if word.upper() in str(line).upper():
results.append("------------------------------------------\n{}: {} (Inside File)\n------------------------------------------\n".format(str(word), file))
if file.upper().endswith('TXT'):
with open(file, 'r') as ifile:
for line in ifile:
for word in self.search_terms_list:
if word.upper() in str(line).upper():
results.append("------------------------------------------\n{}: {} (Inside File)\n------------------------------------------\n".format(str(word), file))
if file.upper().endswith('DAT'):
with open(file, 'r') as ifile:
for line in ifile:
for word in self.search_terms_list:
if word.upper() in str(line).upper():
results.append("------------------------------------------\n{}: {} (Inside File)\n------------------------------------------\n".format(str(word), file))
if file.upper().endswith('TAB'):
with open(file, 'r') as ifile:
for line in ifile:
for word in self.search_terms_list:
if word.upper() in str(line).upper():
results.append("------------------------------------------\n{}: {} (Inside File)\n------------------------------------------\n".format(str(word), file))
else:
continue
fresh_results = list(set(results))
new_results = ''.join(fresh_results)
final_results = str(new_results)
return final_results
except Exception as e:
return e
def excelWalkThrough(self):
try:
results = []
for file in self.firstfilenamelist:
if file.upper().endswith('XLS') or file.upper().endswith('XLSX'):
ifile = pd.read_excel(file, None)
for word in self.search_terms_list:
new_value = word
sheets = list(ifile.keys())
for sheet_name in sheets:
book = xlrd.open_workbook(file)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if str(sheet.cell(row,column).value).upper().find(str(new_value).upper())>= 0:
results.append("------------------------------------------\nFile Type: Excel\n" + "Search Term: {}\n".format(str(new_value)) + "Filename: {}\n".format(file) + "Sheetname: {}\n".format(sheet.name) + "Row: {}\n".format(row + 1) + "Column: {}\n".format(column + 1) + "Value: {}\n".format(sheet.cell(row,column).value) + "------------------------------------------\n")
fresh_results = list(set(results))
new_results = ''.join(fresh_results)
final_results = str(new_results)
return final_results
except Exception as e:
return e
def pdfWalkThrough(self):
try:
results = []
for file in self.firstfilenamelist:
if file.upper().endswith('PDF'):
pdfFile = open(file, 'rb')
fileReader = PyPDF2.PdfFileReader(pdfFile)
number_of_pages = fileReader.getNumPages()
for page in range(number_of_pages):
thispage = fileReader.getPage(page)
page_content = thispage.extractText()
for word in self.search_terms_list:
search_results = re.findall(word, page_content)
if len(search_results) > 0:
for item in search_results:
results.append("------------------------------------------\nFile Type: PDF\n" + "Search Term: {}\n".format(word) + "Filename: {}\n".format(file) + "Page Number: {}\n".format(page + 1) + "------------------------------------------\n")
fresh_results = list(set(results))
new_results = ''.join(fresh_results)
final_results = str(new_results)
return final_results
except Exception as e:
return e
def OnClose(self, event):
sys.exit(1)
app = wx.App()
frame = SearchTool(parent=None, id=-1, size=(340, 160))
frame.Show()
app.MainLoop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment