Skip to content

Instantly share code, notes, and snippets.

@sgibbes
Created October 12, 2017 17:08
Show Gist options
  • Save sgibbes/684d88cee14c06f4de55ba7bc75d6f38 to your computer and use it in GitHub Desktop.
Save sgibbes/684d88cee14c06f4de55ba7bc75d6f38 to your computer and use it in GitHub Desktop.
find common tiles of tsv'd wdpa and ifl data, and download to local dir
from boto.s3.connection import S3Connection
import boto3
import os
# connect to the s3 bucket
conn = S3Connection(host="s3.amazonaws.com")
bucket = conn.get_bucket('gfw2-data')
s3 = boto3.resource('s3')
# loop through file names in the bucket
full_path_list = [key.name for key in bucket.list(prefix='sam')]
# unpack the filename from the list of files
filename_only_list = [x.split('/')[-1] for x in full_path_list]
# make dictionary of {'boundary name': [tile ids]}
boundary_dict = {}
for boundary in ['ifl', 'wdpa']:
boundary_tiles = []
for name in filename_only_list:
if name.split("__")[0] == boundary:
tile_id = name.split("__")[-1:][0].strip(".tsv")
boundary_tiles.append(tile_id)
boundary_dict[boundary] = boundary_tiles
# find tiles that are the same in both lists from the dictionary
same = set(boundary_dict['ifl']) & set(boundary_dict['wdpa'])
# download the tsv files that are the same in both
for bound in ['ifl', 'wdpa']:
if not os.path.exists(bound):
os.mkdir(bound)
for tileid in same:
s3.Bucket('gfw2-data').download_file('sam/{0}__{1}.tsv'.format(bound, tileid), '{0}/{0}__{1}.tsv'.format(bound, tileid))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment