Created
March 18, 2022 06:45
-
-
Save zhangce/5c6ef29ed29c9e0490adeccb46be9c8b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
---------------------------- | |
---- Interactive SQL II ---- | |
---------------------------- | |
| |
set lc_messages to 'en_US.UTF-8'; | |
| |
| |
-- Initial tables for small set | |
| |
create table players ( | |
id serial, | |
first_name varchar(255) not null, | |
last_name varchar(255) not null, | |
elo int not null, | |
-- table constraints | |
unique(first_name, last_name), | |
primary key(id) | |
); | |
| |
create table events ( | |
id serial, | |
name varchar(255), | |
site varchar(255), | |
event_date Date, | |
-- table constraints | |
unique (name, site, event_date), | |
primary key(id) | |
); | |
| |
create table games ( | |
id serial, | |
round varchar(255) not null, | |
result char not null, | |
moves varchar(8192) not null, | |
black_player integer not null, | |
white_player integer not null, | |
eid integer not null, | |
-- table constraints | |
primary key(id) | |
); | |
| |
-- upload small data set | |
| |
copy players from 'your-path-to/tiny_chess/players_db.csv' delimiters ',' csv header; | |
copy events from 'your-path-to/tiny_chess/events_db.csv' delimiters ',' csv header; | |
copy games from '/your-path-to/tiny_chess/games_db.csv' delimiters ',' csv header; | |
| |
-- NOTE in class we discussed letting postgres handle serial IDs for you | |
-- and never explicitly assigning them. This is handled internally by | |
-- a sequence, so if we upload data from a CSV the sequence needs to | |
-- be updated. Run the below commands if you intend to insert into the | |
-- relations. | |
select setval('games_id_seq', (select max(id) from games)); | |
select setval('players_id_seq', (select max(id) from players)); | |
select setval('events_id_seq', (select max(id) from events)); | |
| |
-- 0. Find the names of any player with an Elo rating of 2500 or higher. | |
-- I want 1 column called NAME | |
-- => 3 rows | |
| |
select first_name || ' ' || last_name as Name | |
from players | |
where elo >= 2500; | |
| |
-- 1. Find the names of any player who has ever played a game as white. | |
-- => 5 rows | |
| |
select distinct first_name, last_name | |
from players p join games g | |
on p.id = g.white_player; | |
| |
-- 2. Find the names of any player who has ever won a game as white. | |
-- => 2 rows | |
| |
select distinct first_name, last_name | |
from players p join games g | |
on p.id = g.white_player | |
where g.result = 'W'; | |
| |
-- 3. insert a row into games with non-existent players | |
| |
insert into games (round, result, moves, black_player, white_player, eid) | |
values ( | |
'', 'D', '', | |
(select max(id) + 1 from players), | |
(select max(id) + 2 from players), | |
(select max(id) + 1 from events) | |
); | |
| |
------------------------------- | |
------------------------------- | |
------------------------------- | |
| |
-- drop the incorrect tables and recreate them right | |
-- | |
-- At home exercise, could you modify the tables to make them | |
-- correct? How would this affect data already stored in the table? | |
| |
create table players ( | |
id serial, | |
first_name varchar(255) not null, | |
last_name varchar(255) not null, | |
elo int not null, | |
-- table constraints | |
unique(first_name, last_name), | |
primary key(id) | |
); | |
| |
create table events ( | |
id serial, | |
name varchar(255), | |
site varchar(255), | |
event_date Date, | |
-- table constraints | |
unique (name, site, event_date), | |
primary key(id) | |
); | |
| |
create table games ( | |
id serial, | |
round varchar(255) not null, | |
result char not null, | |
moves varchar(8192) not null, | |
black_player integer not null, | |
white_player integer not null, | |
eid integer not null, | |
-- table constraints | |
foreign key(black_player) references Players(id), | |
foreign key(white_player) references Players(id), | |
foreign key(eid) references events(id), | |
primary key(id) | |
); | |
| |
copy players from 'your-path-to/chess/players_db.csv' delimiters ',' csv header; | |
copy events from 'your-path-to/chess/events_db.csv' delimiters ',' csv header; | |
copy games from '/your-path-to/chess/games_db.csv' delimiters ',' csv header; | |
| |
-- Needed again after an upload | |
select setval('games_id_seq', (select max(id) from games)); | |
select setval('players_id_seq', (select max(id) from players)); | |
select setval('events_id_seq', (select max(id) from events)); | |
| |
------------------------- | |
------------------------- | |
------------------------- | |
| |
-- 4. Find the names of any player who played any games in 2018. | |
-- => 5460 | |
| |
-- In-class query | |
select count(*) from | |
( | |
select distinct first_name, last_name | |
from players p cross join events e cross join games g | |
where (p.id = g.white_player or p.id = g.black_player) | |
and e.id = g.eid | |
and extract(year from e.event_date) = 2018 | |
) as T; | |
| |
-- 5. Find the names and dates of any event in which | |
-- Magnus Carlsen lost a game. | |
-- => 56 | |
| |
select distinct events.name, events.event_date | |
from events cross join games cross join players | |
where games.eid = events.id | |
and ((players.id = white_player and result = 'B') | |
or (players.id = black_player and result = 'W')) | |
and first_name = 'Magnus' and last_name = 'Carlsen'; | |
| |
-- In-class query | |
with MCID as ( | |
select id | |
from players | |
where first_name = 'Magnus' | |
and last_name = 'Carlsen' | |
), | |
GMC as ( | |
select eid | |
from games cross join MCID | |
where (white_player = MCID.id and result = 'B') | |
or (black_player = MCID.id and result = 'W') | |
) | |
select distinct name, event_date from events join GMC on id = eid; | |
| |
-- 6. Find the players with the top 15 Elo ratings | |
-- sort ties by alphabetical order on LAST_NAME | |
| |
select first_name, last_name, elo | |
from players | |
order by elo desc, last_name asc | |
limit 15; | |
| |
-- 7. Find all the players who *share* a last name | |
-- => 2232 | |
| |
-- In-class query | |
select last_name, count(*) as c | |
from players | |
group by last_name | |
having count(*) > 1 | |
order by c desc; | |
| |
-- NOTE projected columns must be part of the 'group by' | |
-- or picked using an aggregate function. In class it was attempted | |
-- to use a "select * ..." which doesn't work for this reason. | |
-- You could for example choose a first_name from the group, | |
-- using an aggregate: | |
| |
select last_name, max(first_name), count(*) as c | |
from players | |
group by last_name | |
having count(*) > 1 | |
order by c desc; | |
| |
-- or by choosing a positional argument as follows: | |
| |
select last_name, (ARRAY_AGG(first_name))[1], count(*) as c | |
from players | |
group by last_name | |
having count(*) > 1 | |
order by c desc; | |
| |
-- TODO at home: try running the above for an index that is | |
-- out of range for the group size, e.g. 3 for groups with only | |
-- 2 players. What happend? | |
| |
-- 8. Find the names of all opponents of Magnus Carlsen. | |
-- An opponent is someone who he has played a game against. | |
-- => 159 | |
| |
-- In-class query | |
with MCID as ( | |
select id | |
from players | |
where first_name = 'Magnus' | |
and last_name = 'Carlsen' | |
limit 1 | |
) | |
select distinct first_name, last_name | |
from players cross join games cross join MCID | |
where ((players.id = games.white_player | |
and games.black_player = MCID.id) | |
or (players.id = games.black_player | |
and games.white_player = MCID.id)); | |
| |
select distinct p1.first_name, p1.last_name | |
from players p1 cross join players p2 cross join games g | |
where p2.last_name='Carlsen' and p2.first_name = 'Magnus' | |
and p1.last_name != 'Carlsen' and p1.first_name != 'Magnus' | |
and ((p1.id=g.white_player and p2.id=g.black_player) | |
or (p2.id=g.white_player and p1.id=g.black_player)); | |
| |
-- 9. Find the names of all players who have never lost a game | |
-- (a draw is not considered a loss). | |
-- => 2613 | |
| |
-- NOTE we decided that a player is only considered IFF they have | |
-- played at least one game. | |
-- In-class query | |
(select distinct p.first_name, p.last_name | |
from players p cross join games g | |
where (p.id = g.white_player or p.id = g.black_player)) | |
except | |
(select distinct p.first_name, p.last_name | |
from players p cross join games g | |
where ((p.id = g.white_player and result = 'B') | |
or (p.id = g.black_player and result = 'W'))); | |
| |
-- Without the above constraint | |
select id, first_name, last_name from players | |
where id not in | |
(select white_player from games where result = 'B' | |
union -- using (... not in ... union) instead of except | |
select black_player from games where result = 'W'); | |
| |
-- 10. Of all people who share a name, find the person | |
-- with the lowest Elo rating. Sorted from highest | |
-- number of shared to lowest. | |
-- => 2291 | |
| |
with low_elo as | |
( | |
select last_name, min(elo) as elo, count(*) as c | |
from players group by last_name | |
having count(*) > 1 | |
) | |
select elo, last_name, first_name | |
from players join low_elo using(last_name, elo) | |
order by c desc, last_name asc; | |
| |
| |
-- XXX. Find the names of all players who have *transitively* | |
-- lost to Magnus Carlsen. | |
-- | |
-- NOTE Recommended to run on tiny_chess database and not the | |
-- full chess database. | |
-- | |
-- At Home Exercise, rewrite this query, trying to make it faster. | |
| |
| |
with recursive losers as ( | |
( | |
with MCID as ( | |
select id | |
from players | |
where first_name = 'Magnus' | |
and last_name = 'Carlsen' | |
-- where first_name = 'some tiny chess name' | |
-- and last_name = some tiny chess name' | |
limit 1 | |
) | |
select distinct players.id, players.first_name, players.last_name | |
from players cross join games cross join MCID | |
where ((players.id = games.white_player | |
and games.black_player = MCID.id | |
and games.result = 'B') | |
or (players.id = games.black_player | |
and games.white_player = MCID.id | |
and games.result = 'W')) | |
) | |
union | |
( | |
select distinct players.id, players.first_name, players.last_name | |
from players cross join games cross join losers | |
where ((players.id = games.white_player | |
and games.black_player = losers.id | |
and games.result = 'B') | |
or (players.id = games.black_player | |
and games.white_player = losers.id | |
and games.result = 'W')) | |
) | |
) select * from losers; | |
| |
| |
-- XXX Find the top 10 most frequent opening moves for the | |
-- white player. Sorted by popularity descending. | |
-- | |
| |
select substring(moves, 1, 5) as move, count(*) as cnt | |
from games | |
group by substring(moves, 1, 5) | |
order by cnt desc | |
limit 10; | |
| |
-------------------------------------------- | |
-- Further exercises for at home practice -- | |
-------------------------------------------- | |
| |
-- XXX What players have won the most with the least | |
-- popular opening move. | |
| |
| |
| |
-- XXX Find the top 10 most frequent opening moves for the | |
-- black player. Sorted by popularity descending. | |
-- | |
| |
| |
| |
-- XXX Find the top 10 players with the best win / lose ratio | |
| |
| |
| |
--- End of session --- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment