Skip to content

Instantly share code, notes, and snippets.

@lakshmanok
lakshmanok / calling_r.java
Last active August 25, 2016 23:32
Running R programs at scale using Dataflow
double[] inputx = c.element(); // from input
engine.put("x", inputx);
// run R program, get output from R, send to Dataflow
ListVector result = (ListVector) engine.eval(new InputStreamReader(rprog));
double pvalue = result.getElementAsDouble(1);
@lakshmanok
lakshmanok / random_sampling
Last active October 6, 2016 18:24
Random Sampling in BigQuery
SELECT
date,
airline,
departure_airport,
departure_schedule,
arrival_airport,
arrival_delay
FROM
[bigquery-samples:airline_ontime_data.flights]
WHERE
@lakshmanok
lakshmanok / repeatable_splitting
Last active October 6, 2016 18:37
Repeatable sampling in BigQuery
SELECT
date,
airline,
departure_airport,
departure_schedule,
arrival_airport,
arrival_delay
FROM
[bigquery-samples:airline_ontime_data.flights]
WHERE
SELECT
date,
airline,
departure_airport,
departure_schedule,
arrival_airport,
arrival_delay
FROM
[bigquery-samples:airline_ontime_data.flights]
WHERE
SELECT
date,
airline,
departure_airport,
departure_schedule,
arrival_airport,
arrival_delay
FROM
[bigquery-samples:airline_ontime_data.flights]
WHERE
SELECT
date,
airline,
departure_airport,
departure_schedule,
arrival_airport,
arrival_delay,
FROM
[bigquery-samples:airline_ontime_data.flights]
WHERE
@lakshmanok
lakshmanok / filterscenes
Created October 16, 2016 02:42
filtering landsat scenes
class SceneInfo:
def __init__ (self, line):
try:
self.SCENE_ID, self.SPACECRAFT_ID, self.SENSOR_ID, self.DATE_ACQUIRED, self.COLLECTION_NUMBER, self.COLLECTION_CATEGORY,self.DATA_TYPE, self.WRS_PATH, self.WRS_ROW, self.CLOUD_COVER, self.NORTH_LAT, self.SOUTH_LAT, self.WEST_LON, self.EAST_LON, self.TOTAL_SIZE, self.BASE_URL = line.split(',')
self.DATE_ACQUIRED = datetime.datetime.strptime(self.DATE_ACQUIRED, '%Y-%m-%d')
self.NORTH_LAT = float(self.NORTH_LAT)
self.SOUTH_LAT = float(self.SOUTH_LAT)
self.WEST_LON = float(self.WEST_LON)
self.EAST_LON = float(self.EAST_LON)
if __name__ == '__main__':
p = beam.Pipeline(options=beam.utils.options.PipelineOptions())
index_file = 'gs://gcp-public-data-landsat/index.csv.gz'
output_file = 'output.txt'
# center of Reunion Island
lat =-21.1; lon = 55.50
# Read the index file and find the best look
scenes = (p
def clearest(scenes):
if scenes:
return min(scenes, key=lambda s: s.CLOUD_COVER)
else:
return None
@lakshmanok
lakshmanok / landsat_area
Last active October 27, 2016 06:59
landsat_area
def clearest_look(allscenes, lat, lon, i, j):
name = 'pt_{}x{}'.format(i,j)
return (allscenes
| 'covers_{}'.format(name) >> beam.FlatMap(lambda scene: filterByLocation(scene, lat+0.25*i, lon-0.4*j))
| 'bymonth_{}'.format(name) >> beam.FlatMap(lambda scene: sceneByMonth(scene) )
| 'clearest_{}'.format(name) >> beam.CombinePerKey(clearest)
)
def run():
# etc.