Skip to content

Instantly share code, notes, and snippets.

@kaspergrubbe
Last active September 2, 2024 04:30
Show Gist options
  • Save kaspergrubbe/b0f87329319e6442d676c0edbf635be9 to your computer and use it in GitHub Desktop.
Save kaspergrubbe/b0f87329319e6442d676c0edbf635be9 to your computer and use it in GitHub Desktop.
Import files from Geonames.com into a PostgreSQL database that runs Postgis

Assumes the following:

Can authenticate to database by using the username postgres without a password.

Does the following:

Setup:

Initialize database and run it:

pg_ctl init -D vendor/postgresql
postgres -D vendor/postgresql

Setup user:

psql -p 5432 -h localhost -d postgres -c "CREATE USER postgres SUPERUSER;"

Run the script:

bundle ruby geonames_postgres.rb

Observe that the thing is working:

Make a query:

SELECT * FROM geoname ORDER BY geometry <-> ST_GeogFromText('POINT(12.026589 55.952455)') LIMIT 1

It should tell you something like:

2618269	Kregme	Kregme	Kregme,Kregome	55.94439	12.03401	P	PPL	DK		17	260			0		25	Europe/Copenhagen	2012-01-17	0101000020E610000053793BC269112840B05582C5E1F84B40
#!/usr/bin/env ruby
require 'open3'
require 'fileutils'
def run_command(command)
puts("+: " + command)
Open3.popen2e(command) do |stdin, stdout_stderr, wait_thread|
Thread.new do
stdout_stderr.each {|l| puts l }
end
wait_thread.value
end
end
def create_temp_file_and_run_sql(sql)
file = Tempfile.new('tempsql')
begin
file.write(sql)
file.flush
run_command("psql --username=postgres -d geonames -f #{file.path}")
ensure
file.close
file.unlink
end
end
def download(directory)
FileUtils.mkdir_p(directory)
Dir.chdir(directory) do
%w(
http://download.geonames.org/export/dump/admin1CodesASCII.txt
http://download.geonames.org/export/dump/admin2Codes.txt
http://download.geonames.org/export/dump/allCountries.zip
http://download.geonames.org/export/dump/alternateNames.zip
http://download.geonames.org/export/dump/countryInfo.txt
http://download.geonames.org/export/dump/iso-languagecodes.txt
http://download.geonames.org/export/dump/featureCodes_en.txt
).each do |url|
filename = url.split('/').last
unzip = filename.split('.').last == 'zip'
run_command("curl -s -o #{filename} #{url}")
if unzip
run_command("unzip #{filename}")
FileUtils.rm(filename)
end
end
end
end
def patch(directory)
Dir.chdir(directory) do
countryinfo = File.open('countryInfo.txt').read
new_lines = []
countryinfo.lines.each do |line|
next if line.start_with?('#')
new_lines << line
end
File.open('countryInfo_patched.txt', 'w+').write(new_lines.join)
featurecodes = File.open('featureCodes_en.txt').read
new_lines = []
featurecodes.lines.each do |line|
next if line.start_with?('null')
fcode, label, description = line.split("\t")
code, fclass = fcode.split('.')
new_lines << [code, fclass, fcode, label, description].join("\t")
end
File.open('featureCodes_en_patched.txt', 'w+').write(new_lines.join)
languagecodes = File.open('iso-languagecodes.txt').read
first_line = languagecodes.lines.first
new_lines = []
languagecodes.lines.each do |line|
next if line == first_line
new_lines << line
end
File.open('iso-languagecodes_patched.txt', 'w+').write(new_lines.join)
admin1codes = File.open('admin1CodesASCII.txt').read
new_lines = []
admin1codes.lines.each do |line|
code, name, alt_name_english, geonameid = line.split("\t")
countrycode, admin1_code = code.split('.')
new_lines << [code, countrycode, admin1_code, name, alt_name_english, geonameid].join("\t")
end
File.open('admin1CodesASCII_patched.txt', 'w+').write(new_lines.join)
admin2codes = File.open('admin2Codes.txt').read
new_lines = []
admin2codes.lines.each do |line|
code, name, alt_name_english, geonameid = line.split("\t")
countrycode, admin1_code, _ = code.split('.')
new_lines << [code, countrycode, admin1_code, name, alt_name_english, geonameid].join("\t")
end
File.open('admin2Codes_patched.txt', 'w+').write(new_lines.join)
end
end
def setup_database
run_command("createdb --username=postgres geonames")
run_command("createlang --username=postgres plpgsql geonames")
%w(
/usr/local/share/postgis/postgis.sql
/usr/local/share/postgis/spatial_ref_sys.sql
/usr/local/share/postgis/postgis_comments.sql
).each do |postgis_file|
raise "Postgis file don't exist: #{postgis_file}! :(" unless File.exist?(postgis_file)
run_command("psql --username=postgres -d geonames -f #{postgis_file}")
end
end
def setup_tables
setup_queries = "CREATE EXTENSION \"postgis\";
CREATE TABLE geoname (
geonameid int,
name varchar(200),
asciiname varchar(200),
alternatenames varchar(10000),
latitude float,
longitude float,
fclass char(1),
fcode varchar(10),
country varchar(2),
cc2 varchar(100),
admin1 varchar(20),
admin2 varchar(80),
admin3 varchar(20),
admin4 varchar(20),
population bigint,
elevation int,
gtopo30 int,
timezone varchar(40),
moddate date
);
ALTER TABLE ONLY geoname ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid);
CREATE INDEX index_geoname_on_name ON geoname USING btree (name);
CREATE TABLE alternatename (
alternatenameid int,
geonameid int,
isoLanguage varchar(7),
alternatename varchar(200),
ispreferredname boolean,
isshortname boolean,
iscolloquial boolean,
ishistoric boolean
);
ALTER TABLE ONLY alternatename ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid);
CREATE TABLE admin1codes (
code varchar(11),
countrycode char(3),
admin1_code varchar(10),
name varchar(200),
alt_name_english varchar(200),
geonameid int
);
ALTER TABLE ONLY admin1codes ADD CONSTRAINT pk_admin1id PRIMARY KEY (geonameid);
CREATE TABLE admin2codes (
code varchar(50),
countrycode char(2),
admin1_code varchar(11),
name varchar(200),
alt_name_english varchar(200),
geonameid int
);
ALTER TABLE ONLY admin2codes ADD CONSTRAINT pk_admin2id PRIMARY KEY (geonameid);
CREATE TABLE countryinfo (
iso_alpha2 char(2),
iso_alpha3 char(3),
iso_numeric integer,
fips_code varchar(3),
name varchar(200),
capital varchar(200),
areainsqkm double precision,
population integer,
continent varchar(2),
tld varchar(10),
currencycode varchar(3),
currencyname varchar(20),
phone varchar(20),
postalcode varchar(100),
postalcoderegex varchar(200),
languages varchar(200),
geonameid int,
neighbors varchar(50),
equivfipscode varchar(3)
);
ALTER TABLE ONLY countryinfo ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2);
CREATE TABLE featurecodes (
code varchar(1),
class varchar(10),
fcode varchar (10),
label varchar(100),
description varchar(1000)
);
ALTER TABLE ONLY featurecodes ADD CONSTRAINT pk_fcode PRIMARY KEY (fcode);
CREATE TABLE languagecodes (
iso_639_3 varchar(10),
iso_639_2 varchar(10),
iso_639_1 varchar(2),
name varchar(1000)
);
ALTER TABLE ONLY languagecodes ADD CONSTRAINT pk_languageid PRIMARY KEY (iso_639_3);
ALTER TABLE ONLY countryinfo ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
ALTER TABLE ONLY alternatename ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
"
create_temp_file_and_run_sql(setup_queries)
end
def populate(directory)
directory = File.join(Dir.pwd, directory)
queries = []
queries << "copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '#{directory}/allCountries.txt' null as '';"
queries << "copy alternatename (alternatenameid,geonameid,isolanguage,alternatename,ispreferredname,isshortname,iscolloquial,ishistoric) from '#{directory}/alternateNames.txt' null as '';"
queries << "copy countryinfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,name,capital,areainsqkm,population,continent,tld,currencycode,currencyname,phone,postalcode,postalcoderegex,languages,geonameid,neighbors,equivfipscode) from '#{directory}/countryInfo_patched.txt' null as '';"
queries << "copy featurecodes (code, class, fcode, label, description) from '#{directory}/featureCodes_en_patched.txt' null as '';"
queries << "copy languagecodes (iso_639_3, iso_639_2, iso_639_1, name) from '#{directory}/iso-languagecodes_patched.txt' null as '';"
queries << "copy admin1codes (code, countrycode, admin1_code, name, alt_name_english, geonameid) from '#{directory}/admin1CodesASCII_patched.txt' null as '';"
queries << "copy admin2codes (code, countrycode, admin1_code, name, alt_name_english, geonameid) from '#{directory}/admin2Codes_patched.txt' null as '';"
queries.each do |populate_sql|
create_temp_file_and_run_sql(populate_sql)
end
end
def create_geometry
geometry = "SELECT AddGeometryColumn ('public','geoname','geometry',4326,'POINT',2);
UPDATE geoname SET geometry = ST_PointFromText('POINT(' || longitude || ' ' || latitude || ')', 4326);
CREATE INDEX idx_geoname_geometry ON public.geoname USING gist(geometry);"
create_temp_file_and_run_sql(geometry)
end
download('tmp_geonames')
patch('tmp_geonames')
setup_database
setup_tables
populate('tmp_geonames')
create_geometry
@goterps2018
Copy link

getting the following error when running the script, can you please help?

geonames_postgres.rb:120: syntax error, unexpected tIDENTIFIER, expecting keyword_do or '{' or '('
run_command( -d geonames -f #{postgis_file}")
^~~~~~~~

@kaspergrubbe
Copy link
Author

@goterps2018 Did you copy the script correctly? That line is missing some interpolation.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment