Compiling an extensive cities list from free geonames.org data
# This module provides a function to compile city data using the free data | |
# provided on http://download.geonames.org/export/dump. | |
# It uses following files to compile a list of 4.4 mio. cities world-wide, | |
# including their name, state (administrative level 1), country, time zone, | |
# latitude, and longitude: | |
# | |
# * allCountries.txt (included in allCountries.zip) | |
# * countryInfo.txt | |
# * admin1CodesASCII.txt | |
# | |
defmodule Cities do | |
defmodule Data do | |
@moduledoc """ | |
Provides functions for compiling a raw data file. | |
""" | |
@doc """ | |
Compiles the needed raw city data from the given files. | |
""" | |
def compile(location_file, countries_file, states_file) do | |
new_file = File.stream!("cities.txt", [:write]) | |
states = compile_states(states_file) | |
countries = compile_countries(countries_file) | |
location_file | |
|> File.stream! | |
|> Stream.filter(&city?/1) | |
|> Stream.map(&to_attrs/1) | |
|> Stream.map(&(replace_state(&1, states))) | |
|> Stream.map(&(replace_country(&1, countries))) | |
|> Stream.map(&to_line/1) | |
|> Enum.into(new_file) | |
end | |
defp city?(line) do | |
type = | |
line | |
|> String.split("\t") | |
|> Enum.at(6) | |
type == "P" # Parish -> city or village | |
end | |
defp to_attrs(line) do | |
[_geonameid, # integer id of record in geonames database | |
name, # name of geographical point (utf8) varchar(200) | |
_asciiname, # name of geographical point in plain ascii characters, varchar(200) | |
_alternatenames, # alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000) | |
latitude, # latitude in decimal degrees (wgs84) | |
longitude, # longitude in decimal degrees (wgs84) | |
_feature_class, # see http://www.geonames.org/export/codes.html, char(1) | |
_feature_code, # see http://www.geonames.org/export/codes.html, varchar(10) | |
country_code, # ISO-3166 2-letter country code, 2 characters | |
_cc2, # alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters | |
state, # fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20) | |
_admin2_code, # code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80) | |
_admin3_code, # code for third level administrative division, varchar(20) | |
_admin4_code, # code for fourth level administrative division, varchar(20) | |
_population, # bigint (8 byte int) | |
_elevation, # in meters, integer | |
_dem, # digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat. | |
timezone, # the iana timezone id (see file timeZone.txt) varchar(40) | |
_modification_date # date of last modification in yyyy-MM-dd format | |
] = String.split(line, "\t") | |
%{city: name, | |
state: state, | |
country: country_code, | |
timezone: timezone, | |
latitude: latitude, | |
longitude: longitude} | |
end | |
defp replace_state(attrs, state_names) do | |
%{attrs | state: state_names["#{attrs.country}.#{attrs.state}"]} | |
end | |
defp compile_states(file) do | |
file | |
|> File.stream! | |
|> Stream.filter(&(!String.starts_with?(&1, "#"))) | |
|> Enum.into(%{}, fn line -> | |
[key, value | _] = String.split(line, "\t") | |
{key, value} | |
end) | |
end | |
defp replace_country(attrs, country_names) do | |
%{attrs | country: country_names[attrs.country]} | |
end | |
defp compile_countries(file) do | |
file | |
|> File.stream! | |
|> Stream.filter(&(!String.starts_with?(&1, "#"))) | |
|> Enum.into(%{}, fn line -> | |
[key, _, _, _, value | _] = String.split(line, "\t") | |
{key, value} | |
end) | |
end | |
defp to_line(attrs) do | |
line = Enum.join([ | |
"\"#{attrs.city}\"", | |
"\"#{attrs.state}\"", | |
"\"#{attrs.country}\"", | |
"\"#{attrs.timezone}\"", | |
attrs.latitude, | |
attrs.longitude | |
], ",") | |
"#{line}\n" | |
end | |
end | |
end | |
# Example usage: | |
Cities.Data.compile("allCountries.txt", "countryInfo.txt", "admin1CodesASCII.txt") |
This comment has been minimized.
This comment has been minimized.
@dgrl you would need to install Elixir. Please have a look here for further details: https://elixir-lang.org/install.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Hi
What program do i need to run this script?
Regards