openinx/ycsb-data.py

## ycsb-data.py
#!/usr/bin/python

import sys
import time
import datetime
import re
import pymysql
import pymysql.cursors

#   create table `ycsb`(
#       case_name varchar(1000), -- master-branch-async-replication-put1e8; HBASE-19064-async-replication-put1e8; HBASE-19064-sync-replication-put1e8
#   	timestamp bigint,
#   	qps_sec   bigint,
#   	max_latency_us bigint,
#   	min_latency_us bigint,
#   	avg_latency_us bigint,
#   	p90_latency_us bigint,
#   	p99_latency_us bigint,
#   	p999_latency_us bigint,
#   	p9999_latency_us bigint
#   );

def parse_timestamp(ts):
    return time.mktime(datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S").timetuple())


def get_qps(data):
    m = re.search('(\d+)(\.\d+)?\scurrent\sops/sec', data)
    if m:
        return int(m.group(1))
    return -1

def get_max(data):
    m = re.search('Max=(\d+)', data)
    if m:
        return int(m.group(1))
    return -1

def get_min(data):
    m = re.search('Min=(\d+)', data)
    if m:
        return int(m.group(1))
    return -1

def get_avg(data):
    m = re.search('Avg=(\d+)(\.\d+)?', data)
    if m:
        return int(m.group(1))
    return -1

def get_p90(data):
    m = re.search('90=(\d+)', data)
    if m:
        return int(m.group(1))
    return -1

def get_p99(data):
    m = re.search('99=(\d+)', data)
    if m:
        return int(m.group(1))
    return -1

def get_p999(data):
    m = re.search('99.9=(\d+)', data)
    if m:
        return int(m.group(1))
    return -1

def get_p9999(data):
    m = re.search('99.99=(\d+)', data)
    if m:
        return int(m.group(1))
    return -1

def get_timestamp(data):
    m = re.search('(\d+-\d+-\d+\s\d+:\d+:\d+):\d+\s\d+\ssec', data)
    if m:
        return m.group(1)
    return ''

#2018-06-25 14:50:47:142 1860 sec: 78642852 operations; 41954.6 current ops/sec; est completion in 8 minutes [INSERT: Count=419549, Max=629247, Min=1671, Avg=2858.03, 90=2079, 99=5455, 99.9=271871, 99.99=626687]
def read_data(filename, testing):
    data_set = []
    with open(filename) as fd:
        data = fd.read()
        for line in data.split('\n'):
            if not line or len(line) == 0:
                continue
            line = line.strip('\n').strip()
            if 'est completion in ' in line:
                ts= get_timestamp(line)
                if not ts or len(ts) ==0:
                    # Skip this line because we failed to parse the timestamp.
                    continue
                ts=parse_timestamp(ts)
                if get_qps(line) < 0 or   \
                    get_max(line) < 0 or  \
                    get_min(line) < 0 or  \
                    get_avg(line) < 0 or  \
                    get_p99(line) < 0 or  \
                    get_p999(line) < 0 or \
                    get_p9999(line) <0:
                    # Skip this line...
                    continue
                #  append this line to the data set.
                data_set.append((testing, ts, get_qps(line), get_max(line), get_min(line), get_avg(line), get_p90(line), get_p99(line), get_p999(line), get_p9999(line)))
    return data_set

def connect_mysql():
    return pymysql.connect(host='localhost',
                           port=3306,
                           user='root',
                           passwd='123456',
                           db='ycsb',
                           autocommit=True,
                           cursorclass=pymysql.cursors.DictCursor)


def main():
    if len(sys.argv) != 3:
        print 'Usage: ycsb-data.py <filename> <testing-name>'
        sys.exit(1)
    filename = sys.argv[1]
    testing = sys.argv[2]
    arrays = read_data(filename, testing)

    conn = connect_mysql()
    try:
        for array in arrays:
            with conn.cursor() as cursor:
                sql= "insert into ycsb values('%s',%s,%s,%s,%s,%s,%s,%s,%s,%s)" % array
                cursor.execute(sql)
    finally:
        conn.close()

main()
	#!/usr/bin/python

	import sys
	import time
	import datetime
	import re
	import pymysql
	import pymysql.cursors

	# create table `ycsb`(
	# case_name varchar(1000), -- master-branch-async-replication-put1e8; HBASE-19064-async-replication-put1e8; HBASE-19064-sync-replication-put1e8
	# timestamp bigint,
	# qps_sec bigint,
	# max_latency_us bigint,
	# min_latency_us bigint,
	# avg_latency_us bigint,
	# p90_latency_us bigint,
	# p99_latency_us bigint,
	# p999_latency_us bigint,
	# p9999_latency_us bigint
	# );

	def parse_timestamp(ts):
	return time.mktime(datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S").timetuple())


	def get_qps(data):
	m = re.search('(\d+)(\.\d+)?\scurrent\sops/sec', data)
	if m:
	return int(m.group(1))
	return -1

	def get_max(data):
	m = re.search('Max=(\d+)', data)
	if m:
	return int(m.group(1))
	return -1

	def get_min(data):
	m = re.search('Min=(\d+)', data)
	if m:
	return int(m.group(1))
	return -1

	def get_avg(data):
	m = re.search('Avg=(\d+)(\.\d+)?', data)
	if m:
	return int(m.group(1))
	return -1

	def get_p90(data):
	m = re.search('90=(\d+)', data)
	if m:
	return int(m.group(1))
	return -1

	def get_p99(data):
	m = re.search('99=(\d+)', data)
	if m:
	return int(m.group(1))
	return -1

	def get_p999(data):
	m = re.search('99.9=(\d+)', data)
	if m:
	return int(m.group(1))
	return -1

	def get_p9999(data):
	m = re.search('99.99=(\d+)', data)
	if m:
	return int(m.group(1))
	return -1

	def get_timestamp(data):
	m = re.search('(\d+-\d+-\d+\s\d+:\d+:\d+):\d+\s\d+\ssec', data)
	if m:
	return m.group(1)
	return ''

	#2018-06-25 14:50:47:142 1860 sec: 78642852 operations; 41954.6 current ops/sec; est completion in 8 minutes [INSERT: Count=419549, Max=629247, Min=1671, Avg=2858.03, 90=2079, 99=5455, 99.9=271871, 99.99=626687]
	def read_data(filename, testing):
	data_set = []
	with open(filename) as fd:
	data = fd.read()
	for line in data.split('\n'):
	if not line or len(line) == 0:
	continue
	line = line.strip('\n').strip()
	if 'est completion in ' in line:
	ts= get_timestamp(line)
	if not ts or len(ts) ==0:
	# Skip this line because we failed to parse the timestamp.
	continue
	ts=parse_timestamp(ts)
	if get_qps(line) < 0 or \
	get_max(line) < 0 or \
	get_min(line) < 0 or \
	get_avg(line) < 0 or \
	get_p99(line) < 0 or \
	get_p999(line) < 0 or \
	get_p9999(line) <0:
	# Skip this line...
	continue
	# append this line to the data set.
	data_set.append((testing, ts, get_qps(line), get_max(line), get_min(line), get_avg(line), get_p90(line), get_p99(line), get_p999(line), get_p9999(line)))
	return data_set

	def connect_mysql():
	return pymysql.connect(host='localhost',
	port=3306,
	user='root',
	passwd='123456',
	db='ycsb',
	autocommit=True,
	cursorclass=pymysql.cursors.DictCursor)


	def main():
	if len(sys.argv) != 3:
	print 'Usage: ycsb-data.py <filename> <testing-name>'
	sys.exit(1)
	filename = sys.argv[1]
	testing = sys.argv[2]
	arrays = read_data(filename, testing)

	conn = connect_mysql()
	try:
	for array in arrays:
	with conn.cursor() as cursor:
	sql= "insert into ycsb values('%s',%s,%s,%s,%s,%s,%s,%s,%s,%s)" % array
	cursor.execute(sql)
	finally:
	conn.close()

	main()