Skip to content

Instantly share code, notes, and snippets.

@mneedham
Created October 28, 2022 12:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mneedham/6701e88f2aa51c0836ee4883492d397a to your computer and use it in GitHub Desktop.
Save mneedham/6701e88f2aa51c0836ee4883492d397a to your computer and use it in GitHub Desktop.
Querying ATP matches using DuckDB
-- Fails because of weird date
CREATE TABLE players AS
select *
from 'atp_players.csv';
-- all varchar
CREATE TABLE players1 AS
select *
from read_csv_auto('atp_players.csv', ALL_VARCHAR=TRUE);
DESCRIBE players1;
SELECT count(*) FROM players1;
-- sample all rows
CREATE TABLE players2 AS
select *
from read_csv_auto('atp_players.csv', SAMPLE_SIZE=-1);
DESCRIBE players2;
SELECT count(*) FROM players2;
-- manual schema
CREATE TABLE players3 AS
SELECT * FROM read_csv('atp_players.csv',
header=True,
columns={'player_id': 'INTEGER', 'name_first': 'VARCHAR', 'name_last': 'VARCHAR', 'hand': 'VARCHAR', 'dob': 'VARCHAR', 'ioc': 'VARCHAR', 'height': 'INTEGER', 'wikidata_id': 'VARCHAR'
});
DESCRIBE players3;
SELECT count(*) FROM players3;
-- ignore errors
CREATE TABLE players4 AS
select *
from read_csv_auto('atp_players.csv', IGNORE_ERRORS=TRUE);
DESCRIBE players4;
SELECT count(*) FROM players4;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment