Last active
April 29, 2019 07:13
-
-
Save spolakh/36e9cf5d392aff2e9751 to your computer and use it in GitHub Desktop.
This script imports geonames into postgres for the purposes of knn-spgist vs knn-gist benchmarking. Based on the script from geonames forums: http://forum.geonames.org/gforum/posts/list/15/926.page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#=============================================================================== | |
# | |
# FILE: getgeo.sh | |
# | |
# USAGE: ./getgeo.sh | |
# | |
# DESCRIPTION: run the script so that the geodata will be downloaded and inserted into your | |
# database | |
# | |
# OPTIONS: --- | |
# REQUIREMENTS: --- | |
# BUGS: --- | |
# NOTES: --- | |
# AUTHOR: Andreas (aka Harpagophyt ) | |
# COMPANY: <a href="http://forum.geonames.org/gforum/posts/list/926.page" target="_blank" rel="nofollow">http://forum.geonames.org/gforum/posts/list/926.page</a> | |
# VERSION: 1.4 | |
# CREATED: 07/06/2008 | |
# REVISION: 1.1 2008-06-07 replace COPY continentCodes through INSERT statements. | |
# 1.2 2008-11-25 Adjusted by Bastiaan Wakkie in order to not unnessisarily | |
# download. | |
# 1.3 2011-08-07 Updated script with tree changes. Removes 2 obsolete records from "countryinfo" dump image, | |
# updated timeZones table with raw_offset and updated postalcode to varchar(20). | |
# 1.4 2012-03-31 Don Drake - Add FKs after data is loaded, also vacuum analyze tables to ensure FK lookups use PK | |
# - Don't unzip text files | |
# - added DROP TABLE | |
# 1.5 2012-06-30 Furdui Marian - added CountryCode to TimeZones and updated geonames.alternatenames to varchar(8000) | |
#=============================================================================== | |
/bin/date | |
WORKPATH="${HOME}/tmp/GIS/gisnames/geodata" | |
TMPPATH="tmp" | |
PCPATH="pc" | |
PREFIX="_" | |
DBHOST="127.0.0.1" | |
DBPORT="5432" | |
DBUSER="postgres" | |
FILES="allCountries.zip" | |
psql -U $DBUSER -h $DBHOST -p $DBPORT -c "CREATE DATABASE geonames WITH TEMPLATE = template0 ENCODING = 'UTF8';" | |
psql -U $DBUSER -h $DBHOST -p $DBPORT geonames <<EOT | |
DROP TABLE geoname CASCADE; | |
CREATE TABLE geoname ( | |
geonameid int, | |
name varchar(200), | |
asciiname varchar(200), | |
alternatenames varchar, | |
latitude float, | |
longitude float, | |
fclass char(1), | |
fcode varchar(10), | |
country varchar(2), | |
cc2 varchar(60), | |
admin1 varchar(20), | |
admin2 varchar(80), | |
admin3 varchar(20), | |
admin4 varchar(20), | |
population bigint, | |
elevation int, | |
gtopo30 int, | |
timezone varchar(40), | |
moddate date | |
); | |
EOT | |
# check if needed directories do already exsist | |
if [ -d "$WORKPATH" ]; then | |
echo "$WORKPATH exists..." | |
sleep 0 | |
else | |
echo "$WORKPATH and subdirectories will be created..." | |
mkdir -p $WORKPATH | |
mkdir -p $WORKPATH/$TMPPATH | |
mkdir -p $WORKPATH/$PCPATH | |
echo "created $WORKPATH" | |
fi | |
echo | |
echo ",---- STARTING (downloading, unpacking and preparing)" | |
cd $WORKPATH/$TMPPATH | |
for i in $FILES | |
do | |
wget -N -q "http://download.geonames.org/export/dump/$i" # get newer files | |
if [ $i -nt $PREFIX$i ] || [ ! -e $PREFIX$i ] ; then | |
cp -p $i $PREFIX$i | |
if [ `expr index zip $i` -eq 1 ]; then | |
unzip -o -u -q $i | |
fi | |
echo "| $i has been downloaded"; | |
else | |
echo "| $i is already the latest version" | |
fi | |
done | |
# download the postalcodes. You must know yourself the url | |
cd $WORKPATH/$PCPATH | |
wget -q -N "http://download.geonames.org/export/zip/allCountries.zip" | |
if [ $WORKPATH/$PCPATH/allCountries.zip -nt $WORKPATH/$PCPATH/allCountries$PREFIX.zip ] || [ ! -e $WORKPATH/$PCPATH/allCountries.zip ]; then | |
echo "Attempt to unzip $WORKPATH/$PCPATH/allCountries.zip file..." | |
unzip -o -u -q $WORKPATH/$PCPATH/allCountries.zip | |
cp -p $WORKPATH/$PCPATH/allCountries.zip $WORKPATH/$PCPATH/allCountries$PREFIX.zip | |
echo "| ....zip has been downloaded" | |
else | |
echo "| ....zip is already the latest version" | |
fi | |
echo "+---- FILL DATABASE ( this takes 2 days on my machine )" | |
psql -e -U $DBUSER -h $DBHOST -p $DBPORT geonames <<EOT | |
copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '${WORKPATH}/${TMPPATH}/allCountries.txt' null as ''; | |
ALTER TABLE ONLY geoname | |
ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid); | |
vacuum analyze verbose geoname; | |
drop table quad_point_tbl; | |
drop table kd_point_tbl; | |
drop table gist_point_tbl; | |
drop table plain_point_tbl; | |
create table quad_point_tbl as select point(latitude, longitude) as p from geoname; | |
create table kd_point_tbl as select point(latitude, longitude) as p from geoname; | |
create table gist_point_tbl as select point(latitude, longitude) as p from geoname; | |
create table plain_point_tbl as select point(latitude, longitude) as p from geoname; | |
create index sp_quad_ind on quad_point_tbl using spgist(p); | |
create index sp_kd_ind on kd_point_tbl using spgist(p kd_point_ops); | |
create index gist_ind on gist_point_tbl using gist(p); | |
explain (analyze on, buffers on) select *, p <-> point(40,40) from quad_point_tbl order by p <-> point(40,40) limit 500; | |
explain (analyze on, buffers on) select *, p <-> point(40,40) from kd_point_tbl order by p <-> point(40,40) limit 500; | |
explain (analyze on, buffers on) select *, p <-> point(40,40) from gist_point_tbl order by p <-> point(40,40) limit 500; | |
explain (analyze on, buffers on) select *, p <-> point(40,40) from plain_point_tbl order by p <-> point(40,40) limit 500; | |
EOT | |
echo "'----- DONE" | |
/bin/date |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment