kozross/question.sql

## question.sql
-- I have the following table schemata in SQLite

CREATE TABLE experiments (
id integer not null primary key autoincrement,seed integer not null,
gens integer not null check (gens > 0),
keep_every integer not null check (keep_every > 0),
cluster_size integer not null check (cluster_size > 0),
search_width integer not null check (search_width > 0),
table_size integer not null check (table_size > 2)
);

CREATE TABLE data (
id integer not null primary key autoincrement,
rank integer not null check (rank > 0),
experiment_id integer not null check (experiment_id > 0),
coverage integer not null check (coverage > 0),
convergence_time integer not null check (convergence_time > -1),
foreign key (experiment_id) references experiments (id)
);

-- This kind of query (of which I will be making *many*) is running very slowly

select e.gens, e.cluster_size, e.search_width, avg(d.coverage)
from experiments as e join data as d
where e.id == d.experiment_id
group by d.experiment_id
order by avg(d.coverage) desc
limit 10;

-- Can I use indexes to make this faster, and if so, what kind of index would you recommend starting with?
-- Query plan for the above is here:

/* 0|0|1|SCAN TABLE data AS d
   0|1|0|SEARCH TABLE experiments AS e USING INTEGER PRIMARY KEY (rowid=?)
   0|0|0|USE TEMP B-TREE FOR GROUP BY
   0|0|0|USE TEMP B-TREE FOR ORDER BY */

-- So, I tried this:

create index cov_ix on data (experiment_id, coverage);

-- The query plan now reads as so:

/* 0|0|1|SCAN TABLE data AS d USING COVERING INDEX cov_ix
   0|1|0|SEARCH TABLE experiments AS e USING INTEGER PRIMARY KEY (rowid=?)
   0|0|0|USE TEMP B-TREE FOR ORDER BY */
	-- I have the following table schemata in SQLite

	CREATE TABLE experiments (
	id integer not null primary key autoincrement,seed integer not null,
	gens integer not null check (gens > 0),
	keep_every integer not null check (keep_every > 0),
	cluster_size integer not null check (cluster_size > 0),
	search_width integer not null check (search_width > 0),
	table_size integer not null check (table_size > 2)
	);

	CREATE TABLE data (
	id integer not null primary key autoincrement,
	rank integer not null check (rank > 0),
	experiment_id integer not null check (experiment_id > 0),
	coverage integer not null check (coverage > 0),
	convergence_time integer not null check (convergence_time > -1),
	foreign key (experiment_id) references experiments (id)
	);

	-- This kind of query (of which I will be making many) is running very slowly

	select e.gens, e.cluster_size, e.search_width, avg(d.coverage)
	from experiments as e join data as d
	where e.id == d.experiment_id
	group by d.experiment_id
	order by avg(d.coverage) desc
	limit 10;

	-- Can I use indexes to make this faster, and if so, what kind of index would you recommend starting with?
	-- Query plan for the above is here:

	/* 0\|0\|1\|SCAN TABLE data AS d
	0\|1\|0\|SEARCH TABLE experiments AS e USING INTEGER PRIMARY KEY (rowid=?)
	0\|0\|0\|USE TEMP B-TREE FOR GROUP BY
	0\|0\|0\|USE TEMP B-TREE FOR ORDER BY */

	-- So, I tried this:

	create index cov_ix on data (experiment_id, coverage);

	-- The query plan now reads as so:

	/* 0\|0\|1\|SCAN TABLE data AS d USING COVERING INDEX cov_ix
	0\|1\|0\|SEARCH TABLE experiments AS e USING INTEGER PRIMARY KEY (rowid=?)
	0\|0\|0\|USE TEMP B-TREE FOR ORDER BY */