Skip to content

Instantly share code, notes, and snippets.

import io
import boto3
from boto.s3.connection import S3Connection
import smart_open
def download_csv(bucket, path, iso):
# read and filter a single csv
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import os
import uuid
import hashlib
import shutil
tif_list = [x for x in os.listdir(".") if os.path.splitext(x)[1] == ".tif"]
for tif in tif_list:
u = str(uuid.uuid4())
@mappingvermont
mappingvermont / convert.sh
Created July 25, 2018 18:00
Install orc and convert CSV --> ORC
# based on this guide:
https://orc.apache.org/docs/building.html
# check out http://www.mirrorservice.org/sites/ftp.apache.org/orc/ to find the newest version
# download it
curl -sSLO http://www.mirrorservice.org/sites/ftp.apache.org/orc/orc-1.5.2/orc-1.5.2.tar.gz
# unzip
tar -xvf orc-1.5.2.tar.gz
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import boto3
import json
session = boto3.Session(profile_name='gfwpro')
client = session.client('lambda', region_name='us-east-1')
aoi = {"features":[{"properties":{},"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[140.2137,-6.3999],[140.3078,-6.3685],[140.3249,-6.3924],[140.3249,-6.4606],[140.3064,-6.49],[140.2274,-6.4838],[140.2068,-6.434],[140.2137,-6.3999]]]}}],"crs":{},"type":"FeatureCollection"}
# then build an event to kick off the process
var AWS = require('aws-sdk');
var run = function(feat) {
var lambda = new AWS.Lambda({region: 'us-east-1'});
var params = {
FunctionName: 'geoproc-raster-umd', /* required */
Payload: JSON.stringify({'queryStringParameters': {'analysis': 'extent', 'thresh': 30}, 'body': {'geojson': {"type":"FeatureCollection","features":[feat]}}})}
lambda.invoke(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
@mappingvermont
mappingvermont / instructions.txt
Created June 26, 2018 19:05
Edit adm2.json country pages file to
Run split.sh to split your input file into 3 pieces - 2 small ones and one giant one.
Then output temp.json and find and replace all 0.0 values for area_loss and emissions with 0.0000001
Then create a file called `temp` with just } in it. For some reason it seems like the split process
above drops the final } from the original JSON file.
Then combine all three files using head, because `cat` will add new lines that we don't want:
head -c -1 -q temp.json remainder temp > adm2.json
@mappingvermont
mappingvermont / clean_primary_forest.py
Created June 26, 2018 14:31
Clean up issues with bound1 and bound2 errors for hadoop / country-pages output
import pandas as pd
import os
import sys
csv_name = sys.argv[1]
src_csv = os.path.join('data', csv_name + '.csv')
dst_csv = os.path.join('final', csv_name + '.csv')
df = pd.read_csv(src_csv)
df.loc[df['polyname'] == 'primary-forest', 'polyname'] = 'primary_forest'