Skip to content

Instantly share code, notes, and snippets.

@tyrostone
Created August 23, 2017 21:13
Show Gist options
  • Save tyrostone/7c6054a9dacda349b81c571a3e68fe24 to your computer and use it in GitHub Desktop.
Save tyrostone/7c6054a9dacda349b81c571a3e68fe24 to your computer and use it in GitHub Desktop.
Script to search through s3 for missing company, campaign, and/or email data
import argparse
import csv
import fnmatch
import multiprocessing
import os
import re
import sys
def search_logfiles_for_email(email):
logfiles = get_email_sending_logfiles(args.logfile_path)
print email
for file_ in logfiles:
f = open(file_, "r")
for line in f:
if re.search(email, line):
print "***************************"
print "match for: {}".format(email)
print "***************************"
print line
def get_emails_from_csv(csv_file):
emails = []
with open(csv_file, 'rb') as f:
reader = csv.reader(f)
for row in reader:
emails.append(row[1])
# First entry returned is just "Email"
# Removing it to remove a lot of false positives
emails.append('buehler315@gmail.com')
return emails[1:]
def get_email_sending_logfiles(filepath, filetype='syslog'):
matches = []
for root, dirnames, filenames in os.walk(filepath):
for filename in fnmatch.filter(filenames, filetype):
matches.append(os.path.join(root, filename))
return matches
def search_applogs_for_ids(campaign_id, company_id=None):
logfiles = get_email_sending_logfiles(args.logfile_path, filetype='app.log')
for file_ in logfiles:
f = open(file_, "r")
for line in f:
if re.search(campaign_id, line):
print "***************************"
print "match for: {}".format(campaign_id)
print "***************************"
print line
continue
if company_id:
if re.search(company_id, line):
print "***************************"
print "match for: {}".format(company_id)
print line
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--csv', dest="csv_file", required=True, help='CSV containing email data')
parser.add_argument('--logfiles', dest="logfile_path", required=True, help='Log data location')
parser.add_argument('--campaign-id', dest="campaign_id", help='The ID of the missing campaign')
parser.add_argument('--company-id', dest="company_id", help='The ID of the company with the missing campaign')
global args
args = parser.parse_args()
if args.campaign_id:
if args.company_id:
search_applogs_for_ids(args.campaign_id, args.company_id)
else:
search_applogs_for_ids(args.campaign_id)
sys.exit(0)
emails = get_emails_from_csv(args.csv_file)
print "Checking logfiles for email matches"
cpu_count = multiprocessing.cpu_count()
pool = multiprocessing.Pool(cpu_count)
pool.map(search_logfiles_for_email, emails)
pool.close()
pool.join()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment