Skip to content

Instantly share code, notes, and snippets.

@openinx
Created January 8, 2019 01:59
Show Gist options
  • Save openinx/c1f19aa3ee93c045317a3ae59bc4a148 to your computer and use it in GitHub Desktop.
Save openinx/c1f19aa3ee93c045317a3ae59bc4a148 to your computer and use it in GitHub Desktop.
Load the qps and latency of ycsb log into MySQL.
#!/usr/bin/python
import sys
import time
import datetime
import re
import pymysql
import pymysql.cursors
# create table `ycsb`(
# case_name varchar(1000), -- master-branch-async-replication-put1e8; HBASE-19064-async-replication-put1e8; HBASE-19064-sync-replication-put1e8
# timestamp bigint,
# qps_sec bigint,
# max_latency_us bigint,
# min_latency_us bigint,
# avg_latency_us bigint,
# p90_latency_us bigint,
# p99_latency_us bigint,
# p999_latency_us bigint,
# p9999_latency_us bigint
# );
def parse_timestamp(ts):
return time.mktime(datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S").timetuple())
def get_qps(data):
m = re.search('(\d+)(\.\d+)?\scurrent\sops/sec', data)
if m:
return int(m.group(1))
return -1
def get_max(data):
m = re.search('Max=(\d+)', data)
if m:
return int(m.group(1))
return -1
def get_min(data):
m = re.search('Min=(\d+)', data)
if m:
return int(m.group(1))
return -1
def get_avg(data):
m = re.search('Avg=(\d+)(\.\d+)?', data)
if m:
return int(m.group(1))
return -1
def get_p90(data):
m = re.search('90=(\d+)', data)
if m:
return int(m.group(1))
return -1
def get_p99(data):
m = re.search('99=(\d+)', data)
if m:
return int(m.group(1))
return -1
def get_p999(data):
m = re.search('99.9=(\d+)', data)
if m:
return int(m.group(1))
return -1
def get_p9999(data):
m = re.search('99.99=(\d+)', data)
if m:
return int(m.group(1))
return -1
def get_timestamp(data):
m = re.search('(\d+-\d+-\d+\s\d+:\d+:\d+):\d+\s\d+\ssec', data)
if m:
return m.group(1)
return ''
#2018-06-25 14:50:47:142 1860 sec: 78642852 operations; 41954.6 current ops/sec; est completion in 8 minutes [INSERT: Count=419549, Max=629247, Min=1671, Avg=2858.03, 90=2079, 99=5455, 99.9=271871, 99.99=626687]
def read_data(filename, testing):
data_set = []
with open(filename) as fd:
data = fd.read()
for line in data.split('\n'):
if not line or len(line) == 0:
continue
line = line.strip('\n').strip()
if 'est completion in ' in line:
ts= get_timestamp(line)
if not ts or len(ts) ==0:
# Skip this line because we failed to parse the timestamp.
continue
ts=parse_timestamp(ts)
if get_qps(line) < 0 or \
get_max(line) < 0 or \
get_min(line) < 0 or \
get_avg(line) < 0 or \
get_p99(line) < 0 or \
get_p999(line) < 0 or \
get_p9999(line) <0:
# Skip this line...
continue
# append this line to the data set.
data_set.append((testing, ts, get_qps(line), get_max(line), get_min(line), get_avg(line), get_p90(line), get_p99(line), get_p999(line), get_p9999(line)))
return data_set
def connect_mysql():
return pymysql.connect(host='localhost',
port=3306,
user='root',
passwd='123456',
db='ycsb',
autocommit=True,
cursorclass=pymysql.cursors.DictCursor)
def main():
if len(sys.argv) != 3:
print 'Usage: ycsb-data.py <filename> <testing-name>'
sys.exit(1)
filename = sys.argv[1]
testing = sys.argv[2]
arrays = read_data(filename, testing)
conn = connect_mysql()
try:
for array in arrays:
with conn.cursor() as cursor:
sql= "insert into ycsb values('%s',%s,%s,%s,%s,%s,%s,%s,%s,%s)" % array
cursor.execute(sql)
finally:
conn.close()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment