Skip to content

Instantly share code, notes, and snippets.

@rhzs
Last active August 29, 2015 14:12
Show Gist options
  • Save rhzs/f2a4ae24ceac9b9a8d74 to your computer and use it in GitHub Desktop.
Save rhzs/f2a4ae24ceac9b9a8d74 to your computer and use it in GitHub Desktop.
Heroku Log Report Statistic (Num of called urls, Response time, and dyno)
#!/usr/bin/python
import time
import resource
import re
from collections import Counter
from threading import Thread
class ThreadLogFile(Thread):
def __init__(self, fileName, urlsToBeProceed, finishedCallback):
"""Initialize the thread"""
Thread.__init__(self)
self.finishedCallback = finishedCallback
self.fileName = fileName
self.urlsDict = {}
for i, urlDict in enumerate(urlsToBeProceed):
matcher = self.buildRegexPattern(urlDict['method'], urlDict['url'], urlDict['dynamicParam'])
self.urlsDict[urlDict['method'] + ' ' +urlDict['url']] = {'noOfCalls': 0, 'responseList': [], 'dynoDict': {}, 'matcher': matcher}
self.regexConnect = re.compile(' connect=\d+ms ')
self.regexService = re.compile(' service=\d+ms ')
self.regexDyno = re.compile(' dyno=web.\d+ ')
# This will build a regex pattern from given URL.
# The pattern will be built if any value between '{' and '}' matches
def buildRegexPattern(self, method, url, params):
urlPattern = url
for matched, type in params.iteritems():
if type == 'number':
urlPattern = re.sub(r'{'+re.escape(matched)+'}', '\d+', urlPattern, flags=re.IGNORECASE)
re.purge()
return method + ' (\w+=)' + urlPattern + ' '
def find(self, keys, line, fn):
for key in keys:
if re.search(self.urlsDict[key]['matcher'], line) is not None:
fn(key, line)
continue
def onSuccess(self, key, line):
def addNoOfCalls():
self.urlsDict[key]['noOfCalls'] += 1
def addResponses():
connect = re.findall("\d+", self.regexConnect.search(line).group())[0]
service = re.findall("\d+", self.regexService.search(line).group())[0]
response_time = int (connect) + int(service)
self.urlsDict[key]['responseList'].append(response_time)
def addDynos():
dyno = re.findall('web.\d+', self.regexDyno.search(line).group())[0]
if dyno in self.urlsDict[key]['dynoDict']:
self.urlsDict[key]['dynoDict'][dyno] += 1
else:
self.urlsDict[key]['dynoDict'][dyno] = 1
addNoOfCalls()
addResponses()
addDynos()
def makeReportDict(self, key):
sortedResponseList = self.urlsDict[key]['responseList']
sortedResponseList.sort()
responseLen = len(sortedResponseList)
def findResponseMean():
return sum(sortedResponseList) / float(responseLen) if sortedResponseList else 0
def findResponseMedian():
return (0.5 *
(
sortedResponseList[(responseLen-1)//2] +
sortedResponseList[responseLen//2]
)
) if sortedResponseList else 0
def findResponseMode():
return Counter(sortedResponseList).most_common(1)[0][0] if sortedResponseList else 0
def findRespondedDyno():
dynoDict = self.urlsDict[key]['dynoDict']
if not dynoDict:
return 0
keys = [x for x, y in dynoDict.items() if y == max(dynoDict.values())]
return keys[0] if len(keys) == 1 else keys
return {
'url': key,
'num_of_calls': self.urlsDict[key]['noOfCalls'],
'res_mean': findResponseMean(),
'res_median': findResponseMedian(),
'res_mode': findResponseMode(),
'dyno_name': findRespondedDyno()
}
def run(self):
"""Run the thread"""
keys = self.urlsDict.keys()
with open(self.fileName, "r") as inFile:
for line in inFile:
self.find(keys, line, self.onSuccess)
re.purge()
for key in keys:
urlStat = self.makeReportDict(key)
self.finishedCallback(urlStat)
def chunk(list, num):
avg = len(list) / float(num)
res = []
last = 0.0
while last < len(list):
res.append(list[int(last):int(last + avg)])
last += avg
return res
def main(fileName, urls):
"""
Rheza Satria, 2014-2015
"""
startTime = time.time()
# change this for number of concurrency
concurrency = 2
result = []
threads = []
urlLen = len(urls)
def onFinishedCallback(resultUrlsDict):
result.append(resultUrlsDict)
def prettyPrintResult():
urlColWidth = 7
callsColWidth = 3
responseColWidth = 3
totalWidth = 180
print '='*totalWidth
print '\t'*(urlColWidth+(responseColWidth+responseColWidth-1)) + 'URL Response Time (in ms)'
print ' URL' \
+ '\t'*urlColWidth + '# num. of calls'\
+ '\t'*callsColWidth + 'Mean'\
+ '\t'*responseColWidth + 'Median'\
+ '\t'*responseColWidth + 'Mode'\
+ '\t'*responseColWidth + 'Most Responded Dyno'
print '='*totalWidth
for i, res in enumerate(result):
callsOfWidthAdjustedSize = 0
if len(res['url']) < 30:
callsOfWidthAdjustedSize = 2
elif len(res['url']) < 40:
callsOfWidthAdjustedSize = 1
print res['url'] \
+ '\t'*(callsColWidth+callsOfWidthAdjustedSize) + str(res['num_of_calls'])\
+ '\t'*responseColWidth + '%.2f' % res['res_mean']\
+ '\t'*responseColWidth + str(res['res_median'])\
+ '\t'*responseColWidth + str(res['res_mode'])\
+ '\t'*responseColWidth + str(res['dyno_name'])
if i != len(result) - 1:
print '-'*totalWidth
print '='*totalWidth
def executeThread(fileName, urlDict):
thread = ThreadLogFile(fileName, urlDict, onFinishedCallback)
thread.start()
threads.append(thread)
def finishedAllThreads():
for thread in threads:
thread.join()
if concurrency == urlLen:
for item, url in enumerate(urls):
executeThread(fileName, [url])
else:
if concurrency > urlLen:
print 'ERROR: Number of concurrency should be less than number of processed urls.'
exit()
pass
chunk_url = chunk(urls, concurrency)
for item, url in enumerate(chunk_url):
executeThread(fileName, url)
finishedAllThreads()
prettyPrintResult()
del urls[:]
del threads[:]
del result[:]
print 'Total running time: {seconds} seconds'.format(seconds=time.time()-startTime)
print 'Memory usage: {bytes} bytes'.format(bytes=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
print 'We used {concurrency} thread(s) to process {numOfUrls} url(s), ' \
'edit the source code to change number of running threads.'.format(concurrency=concurrency, numOfUrls=urlLen)
print ''
print 'Created by Rheza Satria - 27 December 2014'
if __name__ == "__main__":
urls = [
{
'url': '/api/users/{user_id}/count_pending_messages',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
},
{
'url': '/api/users/{user_id}/get_messages',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
},
{
'url': '/api/users/{user_id}/get_friends_progress',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
},
{
'url': '/api/users/{user_id}/get_friends_score',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
},
{
'url': '/api/users/{user_id}',
'method': 'POST',
'dynamicParam': dict({'user_id': 'number'})
},
{
'url': '/api/users/{user_id}',
'method': 'GET',
'dynamicParam': dict({'user_id': 'number'})
}
]
main('sample.log', urls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment