Skip to content

Instantly share code, notes, and snippets.

@skeller88
Last active October 22, 2019 15:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skeller88/2a37ee0e3a6178ba429b711869b08921 to your computer and use it in GitHub Desktop.
Save skeller88/2a37ee0e3a6178ba429b711869b08921 to your computer and use it in GitHub Desktop.
import dask
import dask.array as da
from distributed import Client
import gcsfs
import imageio
import numpy as np
import time
def install():
import os
os.system("pip install gcsfs imageio")
client = Client('35.197.27.240:8786')
client.run(install)
def read_filenames_from_gcs(filenames):
def read(filename):
fs = gcsfs.GCSFileSystem(project='big_earth')
r = fs.cat(filename)
return imageio.core.asarray(imageio.imread(r, 'TIFF'))
lazy_images = da.from_array([read(filename) for filename in filenames], chunks=(len(filenames), 120, 120))
return lazy_images
fs = gcsfs.GCSFileSystem(project='big_earth')
filenames = fs.ls("big_earth/raw_rgb/tiff")
small_filenames = fs.ls("big_earth/raw_test")
start = time.time()
image_paths = []
for path in filenames:
for band in ["B02", "B03", "B04"]:
image_path = f"{path}{path.split('/')[-2]}_{band}.tif"
image_paths.append(image_path)
t = time.time()
print('read image filenames', t - start)
st = time.time()
chunk_size = 100
chunks = []
start = 0
end = start + chunk_size
while end < len(image_paths):
cst = time.time()
chunk = image_paths[start:end]
cst1 = time.time()
if start == 0:
print('loaded chunk in', cst1 - cst)
chunks.append(client.submit(read_filenames_from_gcs, chunk))
if start == 0:
print('submitted chunk in', time.time() - cst1)
start = end
end = min(start + chunk_size, len(stack))
print('completed in', time.time() - st)
persisted_chunks = []
start = time.time()
for idx, chunk in enumerate(chunks):
if idx == 0:
startc = time.time()
persisted_chunks.append(client.persist(chunk.result()))
print('submitted chunk in', time.time() - startc)
else:
persisted_chunks.append(client.persist(chunk.result()))
print('submitted all chunks in', time.time() - start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment