Last active
August 29, 2015 14:23
-
-
Save graingerkid/6d64c5d95543466660f8 to your computer and use it in GitHub Desktop.
Parses a server log file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_log(log): | |
''' | |
Returns generator from log files - typically gb of data. | |
This allows a better performance due to memory issues of | |
returning the entire file. | |
''' | |
with open(log) as f: | |
# opens log file | |
for i in f: | |
yield i | |
def parse_log_file(generator): | |
''' | |
Retuns parsed lines from the log file, splitting the data | |
into the *server_ip, *timestamp, *method, *request_uri, | |
*status_code and *user_agent. | |
''' | |
for result in generator: | |
try: | |
result = result.split(' ') | |
server_ip = result[0] | |
timestamp = result[3].replace('[', '') | |
method = result[5].replace('"', '') | |
request_uri = result[6] | |
status_code = result[8] | |
try: | |
user_agent = result[11].replace('"', '') + result[12].replace('"', '') | |
except IndexError: | |
# this is caused by the useragent sometimes having a space seperating it or sometimes not. | |
user_agent = result[11].replace('"', '') | |
yield server_ip, timestamp, method, request_uri, status_code, user_agent | |
except Exception as e: | |
yield e | |
## | |
## Typical Usage | |
## | |
for i in parse_log_file(read_log('access_log_svr11')): | |
try: | |
print i | |
except Exception as e: | |
print e | |
## | |
## Or specify Google bot?? | |
## | |
for i in parse_log_file(read_log('access_log_svr11')): | |
try: | |
if 'google' in i[5].lower(): | |
print i | |
except Exception as e: | |
print e |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment