Last active
July 11, 2018 10:00
-
-
Save ian29/6134562 to your computer and use it in GitHub Desktop.
downloads, cleans and imports geonames to a postgres database. essentially an automated version of this: https://github.com/colemanm/gazetteer/blob/master/docs/geonames_postgis_import.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e -u | |
# Download and unzip data | |
wget http://download.geonames.org/export/dump/allCountries.zip | |
wget http://download.geonames.org/export/dump/alternateNames.zip | |
wget http://download.geonames.org/export/dump/countryInfo.txt | |
unzip allCountries.zip | |
unzip alternateNames.zip | |
## set up | |
sed -i "" "0,51d" countryInfo.txt # there's a really long annyoy header, get rid of it | |
createdb -U postgres gn | |
psql -U postgres -d gn -q -c "CREATE EXTENSION postgis" | |
## set up postgres tables | |
echo " | |
create table geoname ( | |
geonameid int, | |
name varchar(200), | |
asciiname varchar(200), | |
alternatenames varchar(8000), | |
latitude float, | |
longitude float, | |
fclass char(1), | |
fcode varchar(10), | |
country varchar(2), | |
cc2 varchar(60), | |
admin1 varchar(20), | |
admin2 varchar(80), | |
admin3 varchar(20), | |
admin4 varchar(20), | |
population bigint, | |
elevation int, | |
gtopo30 int, | |
timezone varchar(40), | |
moddate date | |
); | |
create table alternatename ( | |
alternatenameId int, | |
geonameid int, | |
isoLanguage varchar(7), | |
alternateName varchar(200), | |
isPreferredName boolean, | |
isShortName boolean, | |
isColloquial boolean, | |
isHistoric boolean | |
); | |
create table countryinfo ( | |
iso_alpha2 char(2), | |
iso_alpha3 char(3), | |
iso_numeric integer, | |
fips_code varchar(3), | |
name varchar(200), | |
capital varchar(200), | |
areainsqkm double precision, | |
population integer, | |
continent varchar(2), | |
tld varchar(10), | |
currencycode varchar(3), | |
currencyname varchar(20), | |
phone varchar(20), | |
postalcode varchar(100), | |
postalcoderegex varchar(200), | |
languages varchar(200), | |
geonameId int, | |
neighbors varchar(50), | |
equivfipscode varchar(3) | |
); | |
" | psql -U postgres -d gn | |
cat allCountries.txt | psql -U postgres -d gn -c "copy geoname from STDIN null as '';" | |
cat alternateNames.txt | psql -U postgres -d gn -c "copy alternatename from STDIN null as '';" | |
cat countryInfo.txt | psql -U postgres -d gn -c "copy countryinfo from STDIN null as '';" | |
# clean up! | |
rm allCountries.txt alternateNames.zip allCountries.zip alternateNames.txt countryInfo.txt iso-languagecodes.txt | |
# add all the geom columns and indexes and contrstaints and stuff | |
echo " | |
ALTER TABLE ONLY alternatename | |
ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid); | |
ALTER TABLE ONLY geoname | |
ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid); | |
ALTER TABLE ONLY countryinfo | |
ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2); | |
ALTER TABLE ONLY countryinfo | |
ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid); | |
ALTER TABLE ONLY alternatename | |
ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid); | |
SELECT AddGeometryColumn ('public','geoname','the_geom',4326,'POINT',2) | |
UPDATE geoname SET the_geom = ST_PointFromText('POINT(' || longitude || ' ' || latitude || ')', 4326) | |
CREATE INDEX idx_geoname_the_geom ON public.geoname USING gist(the_geom) | |
" | psql -U postgres -d gn |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment