Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:12
Show Gist options
  • Save rhzs/f2a4ae24ceac9b9a8d74 to your computer and use it in GitHub Desktop.
Save rhzs/f2a4ae24ceac9b9a8d74 to your computer and use it in GitHub Desktop.
Heroku Log Report Statistic (Num of called urls, Response time, and dyno)
import time
import resource
import re
from collections import Counter
from threading import Thread
class ThreadLogFile(Thread):
def __init__(self, fileName, urlsToBeProceed, finishedCallback):
"""Initialize the thread"""
self.finishedCallback = finishedCallback
self.fileName = fileName
self.urlsDict = {}
for i, urlDict in enumerate(urlsToBeProceed):
matcher = self.buildRegexPattern(urlDict['method'], urlDict['url'], urlDict['dynamicParam'])
self.urlsDict[urlDict['method'] + ' ' +urlDict['url']] = {'noOfCalls': 0, 'responseList': [], 'dynoDict': {}, 'matcher': matcher}
self.regexConnect = re.compile(' connect=\d+ms ')
self.regexService = re.compile(' service=\d+ms ')
self.regexDyno = re.compile(' dyno=web.\d+ ')
# This will build a regex pattern from given URL.
# The pattern will be built if any value between '{' and '}' matches
def buildRegexPattern(self, method, url, params):
urlPattern = url
for matched, type in params.iteritems():
if type == 'number':
urlPattern = re.sub(r'{'+re.escape(matched)+'}', '\d+', urlPattern, flags=re.IGNORECASE)
return method + ' (\w+=)' + urlPattern + ' '
def find(self, keys, line, fn):
for key in keys:
if[key]['matcher'], line) is not None:
fn(key, line)
def onSuccess(self, key, line):
def addNoOfCalls():
self.urlsDict[key]['noOfCalls'] += 1
def addResponses():
connect = re.findall("\d+",[0]
service = re.findall("\d+",[0]
response_time = int (connect) + int(service)
def addDynos():
dyno = re.findall('web.\d+',[0]
if dyno in self.urlsDict[key]['dynoDict']:
self.urlsDict[key]['dynoDict'][dyno] += 1
self.urlsDict[key]['dynoDict'][dyno] = 1
def makeReportDict(self, key):
sortedResponseList = self.urlsDict[key]['responseList']
responseLen = len(sortedResponseList)
def findResponseMean():
return sum(sortedResponseList) / float(responseLen) if sortedResponseList else 0
def findResponseMedian():
return (0.5 *
sortedResponseList[(responseLen-1)//2] +
) if sortedResponseList else 0
def findResponseMode():
return Counter(sortedResponseList).most_common(1)[0][0] if sortedResponseList else 0
def findRespondedDyno():
dynoDict = self.urlsDict[key]['dynoDict']
if not dynoDict:
return 0
keys = [x for x, y in dynoDict.items() if y == max(dynoDict.values())]
return keys[0] if len(keys) == 1 else keys
return {
'url': key,
'num_of_calls': self.urlsDict[key]['noOfCalls'],
'res_mean': findResponseMean(),
'res_median': findResponseMedian(),
'res_mode': findResponseMode(),
'dyno_name': findRespondedDyno()
def run(self):
"""Run the thread"""
keys = self.urlsDict.keys()
with open(self.fileName, "r") as inFile:
for line in inFile:
self.find(keys, line, self.onSuccess)
for key in keys:
urlStat = self.makeReportDict(key)
def chunk(list, num):
avg = len(list) / float(num)
res = []
last = 0.0
while last < len(list):
res.append(list[int(last):int(last + avg)])
last += avg
return res
def main(fileName, urls):
Rheza Satria, 2014-2015
startTime = time.time()
# change this for number of concurrency
concurrency = 2
result = []
threads = []
urlLen = len(urls)
def onFinishedCallback(resultUrlsDict):
def prettyPrintResult():
urlColWidth = 7
callsColWidth = 3
responseColWidth = 3
totalWidth = 180
print '='*totalWidth
print '\t'*(urlColWidth+(responseColWidth+responseColWidth-1)) + 'URL Response Time (in ms)'
print ' URL' \
+ '\t'*urlColWidth + '# num. of calls'\
+ '\t'*callsColWidth + 'Mean'\
+ '\t'*responseColWidth + 'Median'\
+ '\t'*responseColWidth + 'Mode'\
+ '\t'*responseColWidth + 'Most Responded Dyno'
print '='*totalWidth
for i, res in enumerate(result):
callsOfWidthAdjustedSize = 0
if len(res['url']) < 30:
callsOfWidthAdjustedSize = 2
elif len(res['url']) < 40:
callsOfWidthAdjustedSize = 1
print res['url'] \
+ '\t'*(callsColWidth+callsOfWidthAdjustedSize) + str(res['num_of_calls'])\
+ '\t'*responseColWidth + '%.2f' % res['res_mean']\
+ '\t'*responseColWidth + str(res['res_median'])\
+ '\t'*responseColWidth + str(res['res_mode'])\
+ '\t'*responseColWidth + str(res['dyno_name'])
if i != len(result) - 1:
print '-'*totalWidth
print '='*totalWidth
def executeThread(fileName, urlDict):
thread = ThreadLogFile(fileName, urlDict, onFinishedCallback)
def finishedAllThreads():
for thread in threads:
if concurrency == urlLen:
for item, url in enumerate(urls):
executeThread(fileName, [url])
if concurrency > urlLen:
print 'ERROR: Number of concurrency should be less than number of processed urls.'
chunk_url = chunk(urls, concurrency)
for item, url in enumerate(chunk_url):
executeThread(fileName, url)
del urls[:]
del threads[:]
del result[:]
print 'Total running time: {seconds} seconds'.format(seconds=time.time()-startTime)
print 'Memory usage: {bytes} bytes'.format(bytes=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
print 'We used {concurrency} thread(s) to process {numOfUrls} url(s), ' \
'edit the source code to change number of running threads.'.format(concurrency=concurrency, numOfUrls=urlLen)
print ''
print 'Created by Rheza Satria - 27 December 2014'
if __name__ == "__main__":
urls = [
'url': '/api/users/{user_id}/count_pending_messages',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
'url': '/api/users/{user_id}/get_messages',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
'url': '/api/users/{user_id}/get_friends_progress',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
'url': '/api/users/{user_id}/get_friends_score',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
'url': '/api/users/{user_id}',
'method': 'POST',
'dynamicParam': dict({'user_id': 'number'})
'url': '/api/users/{user_id}',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
main('sample.log', urls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment