Created
July 19, 2022 21:59
-
-
Save mtanco/2fe2f4a8a6ca932cd33d5eb9ee11e88d to your computer and use it in GitHub Desktop.
H2O Wave tutorial for how to download a "big" file and update the user on the progress. Download from URL is used as a specific example but you should be able to modify this for any data pull that is asynchronous or done in bytes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import certifi | |
import pandas as pd | |
import urllib3 # urllib3==1.26.10 | |
from h2o_wave import Q, app, handle_on, main, on, ui # h2o_wave==0.22.0 | |
@app("/") | |
async def serve(q: Q): | |
if not q.client.initialized: | |
# Create the UI for a new browser tab | |
q.page["meta"] = ui.meta_card("") | |
q.page["example"] = ui.form_card( | |
box="1 1 3 2", | |
items=[ | |
ui.button(name="download_file", label="Download File", primary=True), | |
], | |
) | |
q.client.initialized = True | |
await handle_on(q) | |
await q.page.save() | |
@on() | |
async def download_file(q: Q): | |
# Your data-import specifications go here - start pulling data either as async or bytes at a time | |
url = "https://h2o-public-test-data.s3.amazonaws.com/cc_fraud.csv" | |
http = urllib3.PoolManager(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where()) | |
r = http.request("GET", url, preload_content=False) | |
block_sz = 8192 | |
# Information we want to get from your data-import tool | |
file_name = url.split("/")[-1] | |
file_size = int(r.headers["Content-Length"]) | |
q.page["meta"].dialog = ui.dialog( | |
title="Downloading: {} Bytes: {}".format(file_name, file_size), | |
blocking=True, | |
items=[ | |
ui.progress( | |
label="", | |
caption=f"0% complete", | |
value=0, | |
), | |
], | |
) | |
count = 0 | |
f = open(file_name, "wb") | |
# This loop runs until your entire file has downloaded | |
while True: | |
buffer = r.read(block_sz) | |
if not buffer: | |
# we have finished downloading the data | |
q.page["meta"].dialog = None | |
break | |
f.write(buffer) | |
# Our user will wait longer if we update the UI for every byte-block, so we will only updated it every | |
# 250 iterations | |
# if you are running an async job you can replace the below 3 lines of uncommented code a 2-second sleep timer: | |
# await update_progress_ui(q, file_name, file_size); await q.sleep(2) | |
if count % 250 == 0: | |
await update_progress_ui(q, file_name, file_size) | |
count += 1 | |
f.close() | |
# For this demo, we show we have the dataset | |
df = pd.read_csv(file_name, nrows=10) | |
q.page["file_rows"] = ui.form_card( | |
box="1 3 -1 -1", | |
items=[ | |
ui.table( | |
name="my_data", | |
columns=[ui.table_column(col, col) for col in df.columns.values], | |
rows=[ | |
ui.table_row( | |
name=str(i), | |
cells=[str(df[col].values[i]) for col in df.columns.values], | |
) | |
for i in range(len(df)) | |
], | |
) | |
], | |
) | |
async def update_progress_ui(q, file_name, file_size): | |
# If your data pulling job has an api for current size or percent, use that instead! | |
# We use os.path.getsize() in this tutorial to be agnostic to data connector | |
file_size_dl = os.path.getsize(file_name) | |
status = int(file_size_dl * 100.0 // file_size) | |
# Update the progress bar | |
q.page["meta"].dialog.items[0].progress.caption = f"{status}% complete" | |
q.page["meta"].dialog.items[0].progress.value = status / 100 | |
await q.page.save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment