Skip to content

Instantly share code, notes, and snippets.

@gingerwizard
Last active January 22, 2024 16:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gingerwizard/df56c1e361b516b043b47db2eba2031c to your computer and use it in GitHub Desktop.
Save gingerwizard/df56c1e361b516b043b47db2eba2031c to your computer and use it in GitHub Desktop.
CREATE EXTERNAL TABLE IF NOT EXISTS ookla (
  quadkey string,
  tile string,
  avg_d_kbps int,
  avg_u_kbps int,
  avg_lat_ms int,
  avg_lat_down_ms int,
  avg_lat_up_ms int,
  tests int,
  devices int)
STORED AS PARQUET
LOCATION 's3://datasets-documentation/ookla/parquet/performance/type=fixed/'

CREATE TABLE ookla_iceberg (
  quadkey string,
  tile string,
  avg_d_kbps int,
  avg_u_kbps int,
  avg_lat_ms int,
  avg_lat_down_ms int,
  avg_lat_up_ms int,
  tests int,
  devices int,
  year_month date)
  PARTITIONED BY (year(year_month))
  LOCATION 's3://datasets-documentation/ookla/iceberg/'
  TBLPROPERTIES ( 'table_type' ='ICEBERG')
  
  
  INSERT INTO ookla_iceberg SELECT *, DATE_PARSE(
CONCAT_WS('-', REGEXP_EXTRACT("$path" , 'year=(\S+)/quarter', 1), 
LPAD(CAST(CAST(REGEXP_EXTRACT("$path" , 'quarter=(\S+)/', 1) AS int) * 3 AS varchar), 2, '0')
), '%Y-%m') as year_month FROM ookla

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment