Skip to content

Instantly share code, notes, and snippets.

@graingerkid
Last active August 29, 2015 14:23
Show Gist options
  • Save graingerkid/6d64c5d95543466660f8 to your computer and use it in GitHub Desktop.
Save graingerkid/6d64c5d95543466660f8 to your computer and use it in GitHub Desktop.
Parses a server log file
def read_log(log):
'''
Returns generator from log files - typically gb of data.
This allows a better performance due to memory issues of
returning the entire file.
'''
with open(log) as f:
# opens log file
for i in f:
yield i
def parse_log_file(generator):
'''
Retuns parsed lines from the log file, splitting the data
into the *server_ip, *timestamp, *method, *request_uri,
*status_code and *user_agent.
'''
for result in generator:
try:
result = result.split(' ')
server_ip = result[0]
timestamp = result[3].replace('[', '')
method = result[5].replace('"', '')
request_uri = result[6]
status_code = result[8]
try:
user_agent = result[11].replace('"', '') + result[12].replace('"', '')
except IndexError:
# this is caused by the useragent sometimes having a space seperating it or sometimes not.
user_agent = result[11].replace('"', '')
yield server_ip, timestamp, method, request_uri, status_code, user_agent
except Exception as e:
yield e
##
## Typical Usage
##
for i in parse_log_file(read_log('access_log_svr11')):
try:
print i
except Exception as e:
print e
##
## Or specify Google bot??
##
for i in parse_log_file(read_log('access_log_svr11')):
try:
if 'google' in i[5].lower():
print i
except Exception as e:
print e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment