Skip to content

Instantly share code, notes, and snippets.

Last active August 18, 2019 19:22
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Compiling an extensive cities list from free data
# This module provides a function to compile city data using the free data
# provided on
# It uses following files to compile a list of 4.4 mio. cities world-wide,
# including their name, state (administrative level 1), country, time zone,
# latitude, and longitude:
# * allCountries.txt (included in
# * countryInfo.txt
# * admin1CodesASCII.txt
defmodule Cities do
defmodule Data do
@moduledoc """
Provides functions for compiling a raw data file.
@doc """
Compiles the needed raw city data from the given files.
def compile(location_file, countries_file, states_file) do
new_file =!("cities.txt", [:write])
states = compile_states(states_file)
countries = compile_countries(countries_file)
|> Stream.filter(&city?/1)
|>, states)))
|>, countries)))
|> Enum.into(new_file)
defp city?(line) do
type =
|> String.split("\t")
type == "P" # Parish -> city or village
defp to_attrs(line) do
[_geonameid, # integer id of record in geonames database
name, # name of geographical point (utf8) varchar(200)
_asciiname, # name of geographical point in plain ascii characters, varchar(200)
_alternatenames, # alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)
latitude, # latitude in decimal degrees (wgs84)
longitude, # longitude in decimal degrees (wgs84)
_feature_class, # see, char(1)
_feature_code, # see, varchar(10)
country_code, # ISO-3166 2-letter country code, 2 characters
_cc2, # alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters
state, # fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
_admin2_code, # code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
_admin3_code, # code for third level administrative division, varchar(20)
_admin4_code, # code for fourth level administrative division, varchar(20)
_population, # bigint (8 byte int)
_elevation, # in meters, integer
_dem, # digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
timezone, # the iana timezone id (see file timeZone.txt) varchar(40)
_modification_date # date of last modification in yyyy-MM-dd format
] = String.split(line, "\t")
%{city: name,
state: state,
country: country_code,
timezone: timezone,
latitude: latitude,
longitude: longitude}
defp replace_state(attrs, state_names) do
%{attrs | state: state_names["#{}.#{attrs.state}"]}
defp compile_states(file) do
|> Stream.filter(&(!String.starts_with?(&1, "#")))
|> Enum.into(%{}, fn line ->
[key, value | _] = String.split(line, "\t")
{key, value}
defp replace_country(attrs, country_names) do
%{attrs | country: country_names[]}
defp compile_countries(file) do
|> Stream.filter(&(!String.starts_with?(&1, "#")))
|> Enum.into(%{}, fn line ->
[key, _, _, _, value | _] = String.split(line, "\t")
{key, value}
defp to_line(attrs) do
line = Enum.join([
], ",")
# Example usage:
Cities.Data.compile("allCountries.txt", "countryInfo.txt", "admin1CodesASCII.txt")
Copy link

dgrl commented Aug 18, 2019


What program do i need to run this script?


Copy link

paulgoetze commented Aug 18, 2019

@dgrl you would need to install Elixir. Please have a look here for further details:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment