Skip to content

Instantly share code, notes, and snippets.

@mtanco
Created July 19, 2022 21:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtanco/2fe2f4a8a6ca932cd33d5eb9ee11e88d to your computer and use it in GitHub Desktop.
Save mtanco/2fe2f4a8a6ca932cd33d5eb9ee11e88d to your computer and use it in GitHub Desktop.
H2O Wave tutorial for how to download a "big" file and update the user on the progress. Download from URL is used as a specific example but you should be able to modify this for any data pull that is asynchronous or done in bytes.
import os
import certifi
import pandas as pd
import urllib3 # urllib3==1.26.10
from h2o_wave import Q, app, handle_on, main, on, ui # h2o_wave==0.22.0
@app("/")
async def serve(q: Q):
if not q.client.initialized:
# Create the UI for a new browser tab
q.page["meta"] = ui.meta_card("")
q.page["example"] = ui.form_card(
box="1 1 3 2",
items=[
ui.button(name="download_file", label="Download File", primary=True),
],
)
q.client.initialized = True
await handle_on(q)
await q.page.save()
@on()
async def download_file(q: Q):
# Your data-import specifications go here - start pulling data either as async or bytes at a time
url = "https://h2o-public-test-data.s3.amazonaws.com/cc_fraud.csv"
http = urllib3.PoolManager(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())
r = http.request("GET", url, preload_content=False)
block_sz = 8192
# Information we want to get from your data-import tool
file_name = url.split("/")[-1]
file_size = int(r.headers["Content-Length"])
q.page["meta"].dialog = ui.dialog(
title="Downloading: {} Bytes: {}".format(file_name, file_size),
blocking=True,
items=[
ui.progress(
label="",
caption=f"0% complete",
value=0,
),
],
)
count = 0
f = open(file_name, "wb")
# This loop runs until your entire file has downloaded
while True:
buffer = r.read(block_sz)
if not buffer:
# we have finished downloading the data
q.page["meta"].dialog = None
break
f.write(buffer)
# Our user will wait longer if we update the UI for every byte-block, so we will only updated it every
# 250 iterations
# if you are running an async job you can replace the below 3 lines of uncommented code a 2-second sleep timer:
# await update_progress_ui(q, file_name, file_size); await q.sleep(2)
if count % 250 == 0:
await update_progress_ui(q, file_name, file_size)
count += 1
f.close()
# For this demo, we show we have the dataset
df = pd.read_csv(file_name, nrows=10)
q.page["file_rows"] = ui.form_card(
box="1 3 -1 -1",
items=[
ui.table(
name="my_data",
columns=[ui.table_column(col, col) for col in df.columns.values],
rows=[
ui.table_row(
name=str(i),
cells=[str(df[col].values[i]) for col in df.columns.values],
)
for i in range(len(df))
],
)
],
)
async def update_progress_ui(q, file_name, file_size):
# If your data pulling job has an api for current size or percent, use that instead!
# We use os.path.getsize() in this tutorial to be agnostic to data connector
file_size_dl = os.path.getsize(file_name)
status = int(file_size_dl * 100.0 // file_size)
# Update the progress bar
q.page["meta"].dialog.items[0].progress.caption = f"{status}% complete"
q.page["meta"].dialog.items[0].progress.value = status / 100
await q.page.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment