Skip to content

Instantly share code, notes, and snippets.

@yjhjstz
Forked from spolakh/geogres-knn-benchmark
Created April 29, 2019 07:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yjhjstz/01173bf3252a7037f5aeb59a5ebe71cb to your computer and use it in GitHub Desktop.
Save yjhjstz/01173bf3252a7037f5aeb59a5ebe71cb to your computer and use it in GitHub Desktop.
This script imports geonames into postgres for the purposes of knn-spgist vs knn-gist benchmarking. Based on the script from geonames forums: http://forum.geonames.org/gforum/posts/list/15/926.page
#!/bin/bash
#===============================================================================
#
# FILE: getgeo.sh
#
# USAGE: ./getgeo.sh
#
# DESCRIPTION: run the script so that the geodata will be downloaded and inserted into your
# database
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Andreas (aka Harpagophyt )
# COMPANY: <a href="http://forum.geonames.org/gforum/posts/list/926.page" target="_blank" rel="nofollow">http://forum.geonames.org/gforum/posts/list/926.page</a>
# VERSION: 1.4
# CREATED: 07/06/2008
# REVISION: 1.1 2008-06-07 replace COPY continentCodes through INSERT statements.
# 1.2 2008-11-25 Adjusted by Bastiaan Wakkie in order to not unnessisarily
# download.
# 1.3 2011-08-07 Updated script with tree changes. Removes 2 obsolete records from "countryinfo" dump image,
# updated timeZones table with raw_offset and updated postalcode to varchar(20).
# 1.4 2012-03-31 Don Drake - Add FKs after data is loaded, also vacuum analyze tables to ensure FK lookups use PK
# - Don't unzip text files
# - added DROP TABLE
# 1.5 2012-06-30 Furdui Marian - added CountryCode to TimeZones and updated geonames.alternatenames to varchar(8000)
#===============================================================================
/bin/date
WORKPATH="${HOME}/tmp/GIS/gisnames/geodata"
TMPPATH="tmp"
PCPATH="pc"
PREFIX="_"
DBHOST="127.0.0.1"
DBPORT="5432"
DBUSER="postgres"
FILES="allCountries.zip"
psql -U $DBUSER -h $DBHOST -p $DBPORT -c "CREATE DATABASE geonames WITH TEMPLATE = template0 ENCODING = 'UTF8';"
psql -U $DBUSER -h $DBHOST -p $DBPORT geonames <<EOT
DROP TABLE geoname CASCADE;
CREATE TABLE geoname (
geonameid int,
name varchar(200),
asciiname varchar(200),
alternatenames varchar,
latitude float,
longitude float,
fclass char(1),
fcode varchar(10),
country varchar(2),
cc2 varchar(60),
admin1 varchar(20),
admin2 varchar(80),
admin3 varchar(20),
admin4 varchar(20),
population bigint,
elevation int,
gtopo30 int,
timezone varchar(40),
moddate date
);
EOT
# check if needed directories do already exsist
if [ -d "$WORKPATH" ]; then
echo "$WORKPATH exists..."
sleep 0
else
echo "$WORKPATH and subdirectories will be created..."
mkdir -p $WORKPATH
mkdir -p $WORKPATH/$TMPPATH
mkdir -p $WORKPATH/$PCPATH
echo "created $WORKPATH"
fi
echo
echo ",---- STARTING (downloading, unpacking and preparing)"
cd $WORKPATH/$TMPPATH
for i in $FILES
do
wget -N -q "http://download.geonames.org/export/dump/$i" # get newer files
if [ $i -nt $PREFIX$i ] || [ ! -e $PREFIX$i ] ; then
cp -p $i $PREFIX$i
if [ `expr index zip $i` -eq 1 ]; then
unzip -o -u -q $i
fi
echo "| $i has been downloaded";
else
echo "| $i is already the latest version"
fi
done
# download the postalcodes. You must know yourself the url
cd $WORKPATH/$PCPATH
wget -q -N "http://download.geonames.org/export/zip/allCountries.zip"
if [ $WORKPATH/$PCPATH/allCountries.zip -nt $WORKPATH/$PCPATH/allCountries$PREFIX.zip ] || [ ! -e $WORKPATH/$PCPATH/allCountries.zip ]; then
echo "Attempt to unzip $WORKPATH/$PCPATH/allCountries.zip file..."
unzip -o -u -q $WORKPATH/$PCPATH/allCountries.zip
cp -p $WORKPATH/$PCPATH/allCountries.zip $WORKPATH/$PCPATH/allCountries$PREFIX.zip
echo "| ....zip has been downloaded"
else
echo "| ....zip is already the latest version"
fi
echo "+---- FILL DATABASE ( this takes 2 days on my machine )"
psql -e -U $DBUSER -h $DBHOST -p $DBPORT geonames <<EOT
copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '${WORKPATH}/${TMPPATH}/allCountries.txt' null as '';
ALTER TABLE ONLY geoname
ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid);
vacuum analyze verbose geoname;
drop table quad_point_tbl;
drop table kd_point_tbl;
drop table gist_point_tbl;
drop table plain_point_tbl;
create table quad_point_tbl as select point(latitude, longitude) as p from geoname;
create table kd_point_tbl as select point(latitude, longitude) as p from geoname;
create table gist_point_tbl as select point(latitude, longitude) as p from geoname;
create table plain_point_tbl as select point(latitude, longitude) as p from geoname;
create index sp_quad_ind on quad_point_tbl using spgist(p);
create index sp_kd_ind on kd_point_tbl using spgist(p kd_point_ops);
create index gist_ind on gist_point_tbl using gist(p);
explain (analyze on, buffers on) select *, p <-> point(40,40) from quad_point_tbl order by p <-> point(40,40) limit 500;
explain (analyze on, buffers on) select *, p <-> point(40,40) from kd_point_tbl order by p <-> point(40,40) limit 500;
explain (analyze on, buffers on) select *, p <-> point(40,40) from gist_point_tbl order by p <-> point(40,40) limit 500;
explain (analyze on, buffers on) select *, p <-> point(40,40) from plain_point_tbl order by p <-> point(40,40) limit 500;
EOT
echo "'----- DONE"
/bin/date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment