Skip to content

Instantly share code, notes, and snippets.

@morontt
Last active December 4, 2018 10:41
Show Gist options
  • Save morontt/ef7b9196f3461bcc373caaa4f1f563be to your computer and use it in GitHub Desktop.
Save morontt/ef7b9196f3461bcc373caaa4f1f563be to your computer and use it in GitHub Desktop.
log parser
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import pprint
pp = pprint.PrettyPrinter()
counts_hash = {}
total_time = 0
# CustomLog /path/to/request.log "%t %h \"%r\" %b %D"
log_file = 'requests.log'
# log_file = 'short.log'
with open(log_file) as f:
url_test = re.compile('^([^?]+)')
iwf_test = re.compile('^/ifw/[a-f0-9]+/\d+/new')
doccontent_test = re.compile('^/public/doccontent/\d+')
ajax_app_submit_test = re.compile('^/ajax/applicationSubmitted/\d+')
ajax_get_app_test = re.compile('^/ajax/getApplicant/\d+')
ajax_sign_test = re.compile('^/ajax/sign/\d+')
ajax_sign_test_2 = re.compile('^/ifw/[a-f0-9]+/\d+/sign/\d+/')
ajax_ziplookup_test = re.compile('^/ajax/ziplookup/\d+')
applications_edit_test = re.compile('^/applications/edit/\d+')
app_get_report_status_test = re.compile('^/applications/ajaxGetReportStatus/\d+')
app_mergedreport_test = re.compile('^/applications/mergedreport/\d+/0')
app_print_test = re.compile('^/applications/print/\d+')
app_view_test = re.compile('^/applications/view/\d+')
pro_app_approve_test = re.compile('^/prospectapplications/approve/\d+')
pro_app_view_test = re.compile('^/prospectapplications/view/\d+')
renderinvoicemodel_test = re.compile('^/ifw/[a-f0-9]+/\d+/renderinvoicemodel')
reports_print_test = re.compile('^/reports/print/\d+')
line = f.readline()
while line:
line = line.strip()
line_components = line.split()
raw_url = line_components[4]
raw_time = int(line_components[7])
match_obj = url_test.match(raw_url)
clean_url = match_obj.group(1)
if iwf_test.match(clean_url):
clean_url = '/ifw/xxxx/yyyy/new/'
if doccontent_test.match(clean_url):
clean_url = '/public/doccontent/xxxx'
if ajax_app_submit_test.match(clean_url):
clean_url = '/ajax/applicationSubmitted/xxxx'
if ajax_get_app_test.match(clean_url):
clean_url = '/ajax/getApplicant/xxxx'
if ajax_sign_test.match(clean_url):
clean_url = '/ajax/sign/xxxx'
if ajax_ziplookup_test.match(clean_url):
clean_url = '/ajax/ziplookup/xxxx'
if applications_edit_test.match(clean_url):
clean_url = '/applications/edit/xxxx'
if app_get_report_status_test.match(clean_url):
clean_url = '/applications/ajaxGetReportStatus/xxxx'
if app_mergedreport_test.match(clean_url):
clean_url = '/applications/mergedreport/xxxx/0'
if app_print_test.match(clean_url):
clean_url = '/applications/print/xxxx'
if app_view_test.match(clean_url):
clean_url = '/applications/view/xxxx'
if ajax_sign_test_2.match(clean_url):
clean_url = '/ifw/xxxx/yyyy/sign/zzzz/'
if pro_app_approve_test.match(clean_url):
clean_url = '/prospectapplications/approve/xxxx'
if pro_app_view_test.match(clean_url):
clean_url = '/prospectapplications/view/xxxx'
if renderinvoicemodel_test.match(clean_url):
clean_url = '/ifw/xxxx/yyyy/renderinvoicemodel/zzzz'
if reports_print_test.match(clean_url):
clean_url = '/reports/print/xxxx'
# print clean_url
if clean_url in counts_hash:
counts_hash[clean_url] += raw_time
else:
counts_hash[clean_url] = raw_time
total_time += raw_time
line = f.readline()
loads_list = []
for key in counts_hash:
loads_list.append({'url': key,
'time': counts_hash[key],
'percent': 100.0 * counts_hash[key] / total_time})
sorted_loads_list = sorted(loads_list, key=lambda x: x['time'], reverse=True)
# pp.pprint(sorted_loads_list[0:50])
for el in sorted_loads_list[0:50]:
print '{}\t{:2.3f}\turl: {}'.format(el['time'], el['percent'], el['url'])
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import pprint
pp = pprint.PrettyPrinter()
# cat access.log | awk '{ print $7 }' > urls.log
# python rep.py | sort | uniq -ci | sort -nr | head -n 10
with open('urls_21.log') as f:
for line in f:
# print '---'
# pp.pprint(line)
# print line
mathobj = re.match(r'^([^?]+)', line)
# print mathobj.group(1)
pp.pprint(mathobj.group(1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment