Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Import geonames populated places into a dataset http://download.geonames.org/export/dump/
#!/usr/bin/python3
import pandas as pd
import numpy as np
dtypes_dict = {
0: np.dtype(int), # geonameid
1: np.dtype(str), # name
2: np.dtype(str), # asciiname
3: np.dtype(str), # alternatenames
4: np.dtype(float), # latitude
5: np.dtype(float), # longitude
6: np.dtype(str), # feature class
7: np.dtype(str), # feature code
8: np.dtype(str), # country code
9: np.dtype(str), # cc2
10: np.dtype(str), # admin1 code
11: np.dtype(str), # admin2 code
12: np.dtype(str), # admin3 code
13: np.dtype(str), # admin4 code
14: np.dtype(int), # population
15: np.dtype(float), # elevation
16: np.dtype(str), # dem (digital elevation model)
17: np.dtype(str), # timezone
18: np.dtype(str) # modification date yyyy-MM-dd
}
df_chunk = pd.read_csv('allCountries.txt', sep="\t", header = None, dtype=dtypes_dict, chunksize=1000000)
chunk_list = []
for chunk in df_chunk:
chunk_list.append(chunk)
geonames = pd.concat(chunk_list)
places = geonames[geonames[6] == 'P']
dataset = places.filter([4,5,1,10,11,8])
dataset.to_csv('geonames.csv', header=['lat','lon','name','admin1','admin2','cc'], sep=',',index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment