Last active
December 12, 2024 06:19
-
-
Save marcoslot/bb5ff797ab717c0e630bd7416f9c9449 to your computer and use it in GitHub Desktop.
Load AIS data into Iceberg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Load data from https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2024/index.html into Iceberg | |
-- | |
-- To prepare: create extension crunchy_spatial_analytics cascade; | |
-- Clean up previous creation | |
-- drop table if exists ais, loaded_ais_files; | |
-- Create the AIS Iceberg table | |
create table ais ( | |
mmsi bigint not null, | |
basedatetime timestamptz not null, | |
lat double precision not null, | |
lon double precision not null, | |
sog double precision not null, | |
cog double precision not null, | |
heading double precision not null, | |
vesselname text not null, | |
imo text not null, | |
callsign text not null, | |
vesseltype int, | |
status int, | |
length double precision, | |
width double precision, | |
draft double precision, | |
cargo int, | |
transceiverclass text, | |
-- for convenience, auto-generate a point geometry | |
position geometry generated always as (st_makepoint(lon, lat)) stored | |
) | |
using iceberg; | |
-- Track which files were already loaded | |
create table loaded_ais_files (file_date date primary key); | |
-- Function to load a single AIS data file | |
create or replace function load_ais_file(p_file_date date) | |
returns void language plpgsql as $function$ | |
begin | |
-- Remember that we loaded the file | |
insert into loaded_ais_files values (p_file_date) on conflict do nothing; | |
-- or, return if we already loaded it | |
if not found then | |
raise notice 'file for % was already loaded', p_file_date; | |
return; | |
end if; | |
-- create a temp table from the zipped CSV using GDAL, all columns will be text | |
execute format($$ | |
create temp table ais_import () | |
with (load_from = 'https://coast.noaa.gov/htdata/CMSP/AISDataHandler/%1$s/AIS_%2$s.zip', | |
zip_path = 'AIS_%2$s.csv') | |
$$, | |
to_char(p_file_date, 'YYYY'), | |
to_char(p_file_date, 'YYYY_MM_DD')); | |
-- insert into the main AIS table | |
insert into | |
ais (mmsi, basedatetime, lat, lon, sog, cog, heading, vesselname, imo, callsign, vesseltype, status, length, width, draft, cargo, transceiverclass) | |
select | |
mmsi::bigint, | |
basedatetime::timestamptz, | |
lat::double precision, | |
lon::double precision, | |
sog::double precision, | |
cog::double precision, | |
heading::double precision, | |
vesselname, | |
imo, | |
callsign, | |
nullif(vesseltype, '')::int, | |
nullif(status, '')::int, | |
nullif(length, '')::double precision, | |
nullif(width, '')::double precision, | |
nullif(draft, '')::double precision, | |
nullif(cargo, '')::int, | |
nullif(transceiverclass, '') | |
from | |
ais_import; | |
-- cleanup | |
drop table ais_import; | |
raise notice 'loaded %', p_file_date; | |
end; | |
$function$; | |
-- Load all AIS data files in a time range, commit after each file | |
create or replace procedure load_ais(start_date date, end_date date) | |
language plpgsql as $function$ | |
declare file_date date; | |
begin | |
for file_date in select d::date from generate_series(start_date, end_date, interval '1 day') d loop | |
perform load_ais_file(file_date); | |
commit; | |
end loop; | |
end | |
$function$; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Load all available data from 2024 that has not been loaded yet | |
call load_ais('2024-01-01', '2024-09-30'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment