fipar/mysqlslap_employees_order_by

## mysqlslap_employees_order_by
# Requires : employees dataset (https://dev.mysql.com/doc/employee/en/)
# Then just create table employees_idx_on_hire_date like employees; alter table employees_idx_on_hire_date add key (hire_date);

# Pick one of these:

cat<<EOF>query.txt
select * from employees.employees_idx_on_hire_date order by hire_date desc limit 10
EOF

cat<<EOF>query.txt
select * from employees.employees order by hire_date desc limit 10
EOF

# set these:
c=10
i=1000

# run mysqlslap:
mysqlslap --csv -c $c -i $i --create-schema=employees --no-drop -q query.txt

# csv output will be:
benchmark, avg # of secs to run all queries, min # of secs to run all queries, max # of seconds to run all queries, # of clients, avg # of queries per client


# so with csv output, here's a rough way to benchmark different concurrency levels and save the output in a way that can be consumed by R or MySQL and then RMySQL

# this will be run on a 2core box so even 16 is a stretch but ...
i=100
for c in 1 2 4 8 12 16; do
	for query in "select * from employees.employees_idx_on_hire_date order by hire_date desc limit 10" "select * from employees.employees order by hire_date desc limit 10"; do
		echo -n "$query" # first field in the csv will be query type
		mysqlslap --csv -c $c -i $i --create-schema=employees --no-drop -q "$query"
	done
done | tee results.csv


# rest is not valid shell script, just saved MySQL CLI output:

mysql [localhost] {msandbox} (r_input) > show create table mysqlslap_input_20131202_2009\G
*************************** 1. row ***************************
       Table: mysqlslap_input_20131202_2009
Create Table: CREATE TABLE `mysqlslap_input_20131202_2009` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `query` text,
  `benchmark` tinytext,
  `avg_num_secs` float DEFAULT NULL,
  `min_num_secs` float DEFAULT NULL,
  `max_num_secs` float DEFAULT NULL,
  `num_clients` int(11) DEFAULT NULL,
  `avg_query_per_client` float DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=49 DEFAULT CHARSET=latin1

mysql [localhost] {msandbox} (r_input) > load data infile '/Users/fernandoipar/percona/tmp/mysqlslap_input.csv' into table mysqlslap_input_20131202_2009 fields terminated by ',' enclosed by '"' (query,benchmark,avg_num_secs,min_num_secs,max_num_secs,num_clients,avg_query_per_client);

# also not valid shell, saved from RStudio:

> con <- dbConnect(MySQL(), user="msandbox", password="msandbox", dbname="r_input", host="127.0.0.1", port=5527)
> benchmark_results <- dbReadTable(con, "mysqlslap_input_20131202_2009")
> p <- ggplot(benchmark_results, aes(num_clients,avg_num_secs))
> p + geom_point(aes(colour = factor(query)))

# and using the same benchmark_results data, this is more what I want:

> qplot(data=benchmark_results,x=num_clients,y=avg_num_secs,color=query_type)

# and the same thing but using facets:

> qplot(data=benchmark_results,x=num_clients,y=avg_num_secs,color=query_type,facets=~query_type)
	# Requires : employees dataset (https://dev.mysql.com/doc/employee/en/)
	# Then just create table employees_idx_on_hire_date like employees; alter table employees_idx_on_hire_date add key (hire_date);

	# Pick one of these:

	cat<<EOF>query.txt
	select * from employees.employees_idx_on_hire_date order by hire_date desc limit 10
	EOF

	cat<<EOF>query.txt
	select * from employees.employees order by hire_date desc limit 10
	EOF

	# set these:
	c=10
	i=1000

	# run mysqlslap:
	mysqlslap --csv -c $c -i $i --create-schema=employees --no-drop -q query.txt

	# csv output will be:
	benchmark, avg # of secs to run all queries, min # of secs to run all queries, max # of seconds to run all queries, # of clients, avg # of queries per client


	# so with csv output, here's a rough way to benchmark different concurrency levels and save the output in a way that can be consumed by R or MySQL and then RMySQL

	# this will be run on a 2core box so even 16 is a stretch but ...
	i=100
	for c in 1 2 4 8 12 16; do
	for query in "select * from employees.employees_idx_on_hire_date order by hire_date desc limit 10" "select * from employees.employees order by hire_date desc limit 10"; do
	echo -n "$query" # first field in the csv will be query type
	mysqlslap --csv -c $c -i $i --create-schema=employees --no-drop -q "$query"
	done
	done \| tee results.csv


	# rest is not valid shell script, just saved MySQL CLI output:

	mysql [localhost] {msandbox} (r_input) > show create table mysqlslap_input_20131202_2009\G
	************************* 1. row *************************
	Table: mysqlslap_input_20131202_2009
	Create Table: CREATE TABLE `mysqlslap_input_20131202_2009` (
	`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
	`query` text,
	`benchmark` tinytext,
	`avg_num_secs` float DEFAULT NULL,
	`min_num_secs` float DEFAULT NULL,
	`max_num_secs` float DEFAULT NULL,
	`num_clients` int(11) DEFAULT NULL,
	`avg_query_per_client` float DEFAULT NULL,
	PRIMARY KEY (`id`)
	) ENGINE=InnoDB AUTO_INCREMENT=49 DEFAULT CHARSET=latin1

	mysql [localhost] {msandbox} (r_input) > load data infile '/Users/fernandoipar/percona/tmp/mysqlslap_input.csv' into table mysqlslap_input_20131202_2009 fields terminated by ',' enclosed by '"' (query,benchmark,avg_num_secs,min_num_secs,max_num_secs,num_clients,avg_query_per_client);

	# also not valid shell, saved from RStudio:

	> con <- dbConnect(MySQL(), user="msandbox", password="msandbox", dbname="r_input", host="127.0.0.1", port=5527)
	> benchmark_results <- dbReadTable(con, "mysqlslap_input_20131202_2009")
	> p <- ggplot(benchmark_results, aes(num_clients,avg_num_secs))
	> p + geom_point(aes(colour = factor(query)))

	# and using the same benchmark_results data, this is more what I want:

	> qplot(data=benchmark_results,x=num_clients,y=avg_num_secs,color=query_type)

	# and the same thing but using facets:

	> qplot(data=benchmark_results,x=num_clients,y=avg_num_secs,color=query_type,facets=~query_type)