Skip to content

Instantly share code, notes, and snippets.

@zhangce
Created March 18, 2022 06:45
Show Gist options
  • Save zhangce/5c6ef29ed29c9e0490adeccb46be9c8b to your computer and use it in GitHub Desktop.
Save zhangce/5c6ef29ed29c9e0490adeccb46be9c8b to your computer and use it in GitHub Desktop.
----------------------------
---- Interactive SQL II ----
----------------------------
set lc_messages to 'en_US.UTF-8';
-- Initial tables for small set
create table players (
id serial,
first_name varchar(255) not null,
last_name varchar(255) not null,
elo int not null,
-- table constraints
unique(first_name, last_name),
primary key(id)
);
create table events (
id serial,
name varchar(255),
site varchar(255),
event_date Date,
-- table constraints
unique (name, site, event_date),
primary key(id)
);
create table games (
id serial,
round varchar(255) not null,
result char not null,
moves varchar(8192) not null,
black_player integer not null,
white_player integer not null,
eid integer not null,
-- table constraints
primary key(id)
);
-- upload small data set
copy players from 'your-path-to/tiny_chess/players_db.csv' delimiters ',' csv header;
copy events from 'your-path-to/tiny_chess/events_db.csv' delimiters ',' csv header;
copy games from '/your-path-to/tiny_chess/games_db.csv' delimiters ',' csv header;
-- NOTE in class we discussed letting postgres handle serial IDs for you
-- and never explicitly assigning them. This is handled internally by
-- a sequence, so if we upload data from a CSV the sequence needs to
-- be updated. Run the below commands if you intend to insert into the
-- relations.
select setval('games_id_seq', (select max(id) from games));
select setval('players_id_seq', (select max(id) from players));
select setval('events_id_seq', (select max(id) from events));
-- 0. Find the names of any player with an Elo rating of 2500 or higher.
-- I want 1 column called NAME
-- => 3 rows
select first_name || ' ' || last_name as Name
from players
where elo >= 2500;
-- 1. Find the names of any player who has ever played a game as white.
-- => 5 rows
select distinct first_name, last_name
from players p join games g
on p.id = g.white_player;
-- 2. Find the names of any player who has ever won a game as white.
-- => 2 rows
select distinct first_name, last_name
from players p join games g
on p.id = g.white_player
where g.result = 'W';
-- 3. insert a row into games with non-existent players
insert into games (round, result, moves, black_player, white_player, eid)
values (
'', 'D', '',
(select max(id) + 1 from players),
(select max(id) + 2 from players),
(select max(id) + 1 from events)
);
-------------------------------
-------------------------------
-------------------------------
-- drop the incorrect tables and recreate them right
--
-- At home exercise, could you modify the tables to make them
-- correct? How would this affect data already stored in the table?
create table players (
id serial,
first_name varchar(255) not null,
last_name varchar(255) not null,
elo int not null,
-- table constraints
unique(first_name, last_name),
primary key(id)
);
create table events (
id serial,
name varchar(255),
site varchar(255),
event_date Date,
-- table constraints
unique (name, site, event_date),
primary key(id)
);
create table games (
id serial,
round varchar(255) not null,
result char not null,
moves varchar(8192) not null,
black_player integer not null,
white_player integer not null,
eid integer not null,
-- table constraints
foreign key(black_player) references Players(id),
foreign key(white_player) references Players(id),
foreign key(eid) references events(id),
primary key(id)
);
copy players from 'your-path-to/chess/players_db.csv' delimiters ',' csv header;
copy events from 'your-path-to/chess/events_db.csv' delimiters ',' csv header;
copy games from '/your-path-to/chess/games_db.csv' delimiters ',' csv header;
-- Needed again after an upload
select setval('games_id_seq', (select max(id) from games));
select setval('players_id_seq', (select max(id) from players));
select setval('events_id_seq', (select max(id) from events));
-------------------------
-------------------------
-------------------------
-- 4. Find the names of any player who played any games in 2018.
-- => 5460
-- In-class query
select count(*) from
(
select distinct first_name, last_name
from players p cross join events e cross join games g
where (p.id = g.white_player or p.id = g.black_player)
and e.id = g.eid
and extract(year from e.event_date) = 2018
) as T;
-- 5. Find the names and dates of any event in which
-- Magnus Carlsen lost a game.
-- => 56
select distinct events.name, events.event_date
from events cross join games cross join players
where games.eid = events.id
and ((players.id = white_player and result = 'B')
or (players.id = black_player and result = 'W'))
and first_name = 'Magnus' and last_name = 'Carlsen';
-- In-class query
with MCID as (
select id
from players
where first_name = 'Magnus'
and last_name = 'Carlsen'
),
GMC as (
select eid
from games cross join MCID
where (white_player = MCID.id and result = 'B')
or (black_player = MCID.id and result = 'W')
)
select distinct name, event_date from events join GMC on id = eid;
-- 6. Find the players with the top 15 Elo ratings
-- sort ties by alphabetical order on LAST_NAME
select first_name, last_name, elo
from players
order by elo desc, last_name asc
limit 15;
-- 7. Find all the players who *share* a last name
-- => 2232
-- In-class query
select last_name, count(*) as c
from players
group by last_name
having count(*) > 1
order by c desc;
-- NOTE projected columns must be part of the 'group by'
-- or picked using an aggregate function. In class it was attempted
-- to use a "select * ..." which doesn't work for this reason.
-- You could for example choose a first_name from the group,
-- using an aggregate:
select last_name, max(first_name), count(*) as c
from players
group by last_name
having count(*) > 1
order by c desc;
-- or by choosing a positional argument as follows:
select last_name, (ARRAY_AGG(first_name))[1], count(*) as c
from players
group by last_name
having count(*) > 1
order by c desc;
-- TODO at home: try running the above for an index that is
-- out of range for the group size, e.g. 3 for groups with only
-- 2 players. What happend?
-- 8. Find the names of all opponents of Magnus Carlsen.
-- An opponent is someone who he has played a game against.
-- => 159
-- In-class query
with MCID as (
select id
from players
where first_name = 'Magnus'
and last_name = 'Carlsen'
limit 1
)
select distinct first_name, last_name
from players cross join games cross join MCID
where ((players.id = games.white_player
and games.black_player = MCID.id)
or (players.id = games.black_player
and games.white_player = MCID.id));
select distinct p1.first_name, p1.last_name
from players p1 cross join players p2 cross join games g
where p2.last_name='Carlsen' and p2.first_name = 'Magnus'
and p1.last_name != 'Carlsen' and p1.first_name != 'Magnus'
and ((p1.id=g.white_player and p2.id=g.black_player)
or (p2.id=g.white_player and p1.id=g.black_player));
-- 9. Find the names of all players who have never lost a game
-- (a draw is not considered a loss).
-- => 2613
-- NOTE we decided that a player is only considered IFF they have
-- played at least one game.
-- In-class query
(select distinct p.first_name, p.last_name
from players p cross join games g
where (p.id = g.white_player or p.id = g.black_player))
except
(select distinct p.first_name, p.last_name
from players p cross join games g
where ((p.id = g.white_player and result = 'B')
or (p.id = g.black_player and result = 'W')));
-- Without the above constraint
select id, first_name, last_name from players
where id not in
(select white_player from games where result = 'B'
union -- using (... not in ... union) instead of except
select black_player from games where result = 'W');
-- 10. Of all people who share a name, find the person
-- with the lowest Elo rating. Sorted from highest
-- number of shared to lowest.
-- => 2291
with low_elo as
(
select last_name, min(elo) as elo, count(*) as c
from players group by last_name
having count(*) > 1
)
select elo, last_name, first_name
from players join low_elo using(last_name, elo)
order by c desc, last_name asc;
-- XXX. Find the names of all players who have *transitively*
-- lost to Magnus Carlsen.
--
-- NOTE Recommended to run on tiny_chess database and not the
-- full chess database.
--
-- At Home Exercise, rewrite this query, trying to make it faster.
with recursive losers as (
(
with MCID as (
select id
from players
where first_name = 'Magnus'
and last_name = 'Carlsen'
-- where first_name = 'some tiny chess name'
-- and last_name = some tiny chess name'
limit 1
)
select distinct players.id, players.first_name, players.last_name
from players cross join games cross join MCID
where ((players.id = games.white_player
and games.black_player = MCID.id
and games.result = 'B')
or (players.id = games.black_player
and games.white_player = MCID.id
and games.result = 'W'))
)
union
(
select distinct players.id, players.first_name, players.last_name
from players cross join games cross join losers
where ((players.id = games.white_player
and games.black_player = losers.id
and games.result = 'B')
or (players.id = games.black_player
and games.white_player = losers.id
and games.result = 'W'))
)
) select * from losers;
-- XXX Find the top 10 most frequent opening moves for the
-- white player. Sorted by popularity descending.
--
select substring(moves, 1, 5) as move, count(*) as cnt
from games
group by substring(moves, 1, 5)
order by cnt desc
limit 10;
--------------------------------------------
-- Further exercises for at home practice --
--------------------------------------------
-- XXX What players have won the most with the least
-- popular opening move.
-- XXX Find the top 10 most frequent opening moves for the
-- black player. Sorted by popularity descending.
--
-- XXX Find the top 10 players with the best win / lose ratio
--- End of session ---
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment