Last active
August 29, 2015 14:23
-
-
Save mpdehnel/7a8f8e6ec283971b2df6 to your computer and use it in GitHub Desktop.
Pick out all of the IP Addresses listed in Logwatch emails from within a .mbox MailBox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mailbox, re, argparse | |
import cPickle as pickle | |
def gen_summary(filename): | |
ip_store = dict() | |
my_box = mailbox.mbox(filename) | |
for message in my_box: | |
subject = message['subject'] # Could possibly be None. | |
if subject and 'Logwatch for' in subject: | |
ip_addresses = ip_finder(message.get_payload()) | |
for x in ip_addresses: | |
if x in ip_store: | |
ip_store[x] = ip_store[x] +1 | |
else: | |
ip_store[x] = 1 | |
# save the output in Pickle format for later use | |
pickle.dump( ip_store, open( "ip_store.p", "wb" ) ) | |
return ip_store | |
def ip_finder(message): | |
ip = re.findall( r'\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', message) | |
return ip | |
''' Take filename as input from the command-line ''' | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("filename", help="path to MBOX file to be scraped; of type .mbox") | |
args = parser.parse_args() | |
print gen_summary(args.filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment