Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
earth-engine-to-bigquery
#!/bin/sh
CREDENTIALS="credentials.json"
COLLECTION="projects/earthengine-public/assets/WorldPop/GP/100m/pop"
YEAR=2020
export "GOOGLE_APPLICATION_CREDENTIALS=$CREDENTIALS"
IMAGES=$(ogrinfo -ro -al "EEDA:" -oo "COLLECTION=$COLLECTION" -where "year=$YEAR" | grep 'gdal_dataset (String) = ' | cut -d '=' -f2 | tr -d ' ')
for IMAGE in $IMAGES
do
NAME=$(basename "$IMAGE")
echo "Fetching Image: $IMAGE -> ${NAME}"
#gdal_translate -of NetCDF "$IMAGE" "${NAME}.nc"
gdal_translate "$IMAGE" "${NAME}.tif"
done
#!/usr/bin/env python3
# See: https://medium.com/@lakshmanok/how-to-load-geojson-files-into-bigquery-gis-9dc009802fb4
import os
import sys
import json
#import glob
#for fname in glob.glob('*.geojson'):
for fname in sys.argv[1:]:
name = os.path.splitext(fname)[0]
print (fname, name)
with open(fname, 'r') as ifp:
with open(f'{name}.json', 'w') as ofp:
features = json.load(ifp)['features']
# new-line-separated JSON
for obj in features:
props = obj['properties']
props['geometry'] = json.dumps({
'type': 'Polygon',
'coordinates': obj['geometry']['coordinates'][0]
})
print(props, file=ofp)
#!/usr/bin/env python3
import os
import sys
#import glob
import xarray as xr
import numpy as np
import geopandas as gpd
from shapely.geometry import box
import json
#from geolib import geohash
import geohash
# check geohash accuracy
# PostGIS example from https://postgis.net/docs/ST_GeoHash.html
assert (geohash.encode(48, -126, 20) == 'c0w3hf1s70w3hf1s70w3')
# Google BigQuery example from https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_geohash
assert (geohash.encode(47.62, -122.35, 10) == 'c22yzugqw7')
#for fname in glob.glob('*.tif'):
for fname in sys.argv[1:]:
name = os.path.splitext(fname)[0]
print (fname, name)
with xr.open_rasterio(fname).squeeze(drop=True) as da:
# define raster resolution
yres, xres = da.res
# split too big datasets to limit RAM consumption
size = da.y.shape[0]
chunksize = 1000
chunks = int(np.round(size/chunksize + 0.5))
print (f'{chunks} chunks:', end=' ', flush=True)
for chunk in range(chunks):
print (chunk+1, end=' ', flush=True)
# convert dataarray chunk to dataframe
df = da[chunk*chunksize:(chunk+1)*chunksize].to_dataframe(name='population').reset_index()
# drop NoDATA values
df = df[df['population']>0]
# further processing impossible when dataframe is empty (no valid pixels)
if len(df) == 0:
continue
# add some attributes
df['id'] = name
# calculate geohash for point geometry
df['geohash'] = df.apply(lambda row: geohash.encode(row.y, row.x, 10), axis=1)
# convert point geometry to bounding box
# box(minx, miny, maxx, maxy, ccw=True)
df['geometry'] = df.apply(lambda row: box(row.x-xres, row.y-yres, row.x+xres, row.y+yres), axis=1)
# remove separate coordinates x, y
del df['x']
del df['y']
# convert dataframe to geodataframe and reproject to global decimal degree coordinates
gdf = gpd.GeoDataFrame(df).set_crs(da.crs.replace('+init=','')).to_crs('epsg:4326')
# save to file as geojson
gdf.to_file(f'{name}_{chunk+1:02}.geojson', driver='GeoJSON', index=False)
print ('', flush=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment