Skip to content

Instantly share code, notes, and snippets.

@tomkralidis
Last active November 7, 2019 02:48
Show Gist options
  • Save tomkralidis/3b6263ec9fbd84e6b50d79527dda149f to your computer and use it in GitHub Desktop.
Save tomkralidis/3b6263ec9fbd84e6b50d79527dda149f to your computer and use it in GitHub Desktop.
STAC Landsat 8 AWS to ES tooling in Python

Landsat 8 on AWS to ES tooling

curl http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz | gunzip > /tmp/scene_list
# download 1000 random JSON scene metadata
python landsat8-aws-es.py fetch /tmp/scene_list

# publish the JSON scene metadata in current directory as STAC records to an ES index
python landsat8-aws-es.py load ./

# see results in ES
curl "http://localhost:9200/landsat8-aws/_search?pretty=true" | more
import glob
import json
import random
import requests
import sys
from elasticsearch import Elasticsearch
LIMIT = 1000
def fetch_sample_records(lines):
for i in range(1, LIMIT):
line = random.choice(lines)
download_url = line.split(',')[-1]
scene_id = download_url.split('/')[8]
json_filename = '{}_MTL.json'.format(scene_id)
metadata_url = download_url.replace('index.html', json_filename)
print(metadata_url)
with open(json_filename, 'wb') as fh:
fh.write(requests.get(metadata_url).content)
def generate_stac_item(record):
minx = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_LR_LON_PRODUCT']
miny = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_LR_LAT_PRODUCT']
maxx = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_UR_LON_PRODUCT']
maxy = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_UR_LAT_PRODUCT']
datetime = '{}T{}'.format(record['L1_METADATA_FILE']['PRODUCT_METADATA']['DATE_ACQUIRED'], record['L1_METADATA_FILE']['PRODUCT_METADATA']['SCENE_CENTER_TIME'])
d = {
'id': record['L1_METADATA_FILE']['METADATA_FILE_INFO']['LANDSAT_SCENE_ID'],
'bbox': [minx, miny, maxx, maxy],
'geometry': {
'type': 'Polygon',
'coordinates': [[
[minx, miny],
[minx, maxy],
[maxx, maxy],
[maxx, miny],
[minx, miny]
]],
},
'type': 'Feature',
'properties': {
'datetime': datetime,
'eo:platform': record['L1_METADATA_FILE']['PRODUCT_METADATA']['SPACECRAFT_ID'],
'eo:instrument': record['L1_METADATA_FILE']['PRODUCT_METADATA']['SENSOR_ID'],
'eo:cloud_cover': record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['CLOUD_COVER'],
'landsat:wrs_path': record['L1_METADATA_FILE']['PRODUCT_METADATA']['WRS_PATH'],
'landsat:wrs_row': record['L1_METADATA_FILE']['PRODUCT_METADATA']['WRS_ROW'],
'landsat:image_quality_oli': record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['IMAGE_QUALITY_OLI'],
'landsat:earth_sun_distance': record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['EARTH_SUN_DISTANCE'],
'landsat:scene_id': record['L1_METADATA_FILE']['METADATA_FILE_INFO']['LANDSAT_SCENE_ID'],
'landsat:product_id': record['L1_METADATA_FILE']['METADATA_FILE_INFO']['LANDSAT_PRODUCT_ID'],
'landsat:processingLevel': record['L1_METADATA_FILE']['PRODUCT_METADATA']['DATA_TYPE']
},
'assets': {}
}
if 'IMAGE_QUALITY_TIRS' in record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']:
d['properties']['eo:image_quality_tirs'] = record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['IMAGE_QUALITY_TIRS']
url = 'https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/{}/{}/{}/{}'.format(
str(d['properties']['landsat:wrs_path']).zfill(3),
str(d['properties']['landsat:wrs_row']).zfill(3),
d['properties']['landsat:product_id'], d['properties']['landsat:product_id'])
thumb_small = '{}_thumb_small.jpg'.format(url)
thumb_large = '{}_thumb_large.jpg'.format(url)
d['assets']['thumbnail'] = {'href': thumb_small}
d['assets']['thumbnail_large'] = {'href': thumb_large}
for i in range(1, 13):
band = 'B{}'.format(i)
b_url = '{}_{}.TIF'.format(url, band)
d['assets'][band] = {
'href': b_url,
'type': 'image/tiff; application=geotiff; profile=cloud-optimized'
}
band_ovr = 'B{}_OVR'.format(i)
b_url_ovr = '{}_{}.TIF.ovr'.format(url, band)
d['assets'][band_ovr] = {
'href': b_url_ovr
}
return d
def load_es(dir_):
index_name = 'landsat8-aws'
type_name = 'FeatureCollection'
settings = {
'mappings': {
'FeatureCollection': {
'properties': {
'geometry': {
'type': 'geo_shape'
}
}
}
}
}
es = Elasticsearch()
if es.indices.exists(index_name):
es.indices.delete(index_name)
# create index
es.indices.create(index=index_name, body=settings, request_timeout=90)
for f in glob.glob('{}/*.json'.format(dir_)):
with open(f) as fh:
stac_item = generate_stac_item(json.load(fh))
try:
res = es.index(index=index_name, doc_type=type_name,
id=stac_item['id'], body=stac_item)
except Exception as err:
print(stac_item)
print(err)
if __name__ == '__main__':
if sys.argv[1] == 'fetch':
with open(sys.argv[2]) as fh:
lines = fh.read().splitlines()
fetch_sample_records(lines)
elif sys.argv[1] == 'load':
load_es(sys.argv[2])
else:
print('Usage: {} <fetch|load>'.format(sys.argv[0]))
sys.exit(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment