mtanco/download_file_progress.py

## download_file_progress.py
import os

import certifi
import pandas as pd
import urllib3  # urllib3==1.26.10
from h2o_wave import Q, app, handle_on, main, on, ui  # h2o_wave==0.22.0


@app("/")
async def serve(q: Q):

    if not q.client.initialized:
        # Create the UI for a new browser tab
        q.page["meta"] = ui.meta_card("")
        q.page["example"] = ui.form_card(
            box="1 1 3 2",
            items=[
                ui.button(name="download_file", label="Download File", primary=True),
            ],
        )
        q.client.initialized = True

    await handle_on(q)
    await q.page.save()


@on()
async def download_file(q: Q):

    # Your data-import specifications go here - start pulling data either as async or bytes at a time
    url = "https://h2o-public-test-data.s3.amazonaws.com/cc_fraud.csv"
    http = urllib3.PoolManager(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())
    r = http.request("GET", url, preload_content=False)
    block_sz = 8192

    # Information we want to get from your data-import tool
    file_name = url.split("/")[-1]
    file_size = int(r.headers["Content-Length"])

    q.page["meta"].dialog = ui.dialog(
        title="Downloading: {} Bytes: {}".format(file_name, file_size),
        blocking=True,
        items=[
            ui.progress(
                label="",
                caption=f"0% complete",
                value=0,
            ),
        ],
    )

    count = 0
    f = open(file_name, "wb")

    # This loop runs until your entire file has downloaded
    while True:
        buffer = r.read(block_sz)
        if not buffer:
            # we have finished downloading the data
            q.page["meta"].dialog = None
            break
        f.write(buffer)

        # Our user will wait longer if we update the UI for every byte-block, so we will only updated it every
        # 250 iterations
        # if you are running an async job you can replace the below 3 lines of uncommented code a 2-second sleep timer:
        #       await update_progress_ui(q, file_name, file_size); await q.sleep(2)
        if count % 250 == 0:
            await update_progress_ui(q, file_name, file_size)
        count += 1

    f.close()

    # For this demo, we show we have the dataset
    df = pd.read_csv(file_name, nrows=10)

    q.page["file_rows"] = ui.form_card(
        box="1 3 -1 -1",
        items=[
            ui.table(
                name="my_data",
                columns=[ui.table_column(col, col) for col in df.columns.values],
                rows=[
                    ui.table_row(
                        name=str(i),
                        cells=[str(df[col].values[i]) for col in df.columns.values],
                    )
                    for i in range(len(df))
                ],
            )
        ],
    )


async def update_progress_ui(q, file_name, file_size):
    # If your data pulling job has an api for current size or percent, use that instead!
    # We use os.path.getsize() in this tutorial to be agnostic to data connector
    file_size_dl = os.path.getsize(file_name)
    status = int(file_size_dl * 100.0 // file_size)

    # Update the progress bar
    q.page["meta"].dialog.items[0].progress.caption = f"{status}% complete"
    q.page["meta"].dialog.items[0].progress.value = status / 100

    await q.page.save()
	import os

	import certifi
	import pandas as pd
	import urllib3 # urllib3==1.26.10
	from h2o_wave import Q, app, handle_on, main, on, ui # h2o_wave==0.22.0


	@app("/")
	async def serve(q: Q):

	if not q.client.initialized:
	# Create the UI for a new browser tab
	q.page["meta"] = ui.meta_card("")
	q.page["example"] = ui.form_card(
	box="1 1 3 2",
	items=[
	ui.button(name="download_file", label="Download File", primary=True),
	],
	)
	q.client.initialized = True

	await handle_on(q)
	await q.page.save()


	@on()
	async def download_file(q: Q):

	# Your data-import specifications go here - start pulling data either as async or bytes at a time
	url = "https://h2o-public-test-data.s3.amazonaws.com/cc_fraud.csv"
	http = urllib3.PoolManager(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())
	r = http.request("GET", url, preload_content=False)
	block_sz = 8192

	# Information we want to get from your data-import tool
	file_name = url.split("/")[-1]
	file_size = int(r.headers["Content-Length"])

	q.page["meta"].dialog = ui.dialog(
	title="Downloading: {} Bytes: {}".format(file_name, file_size),
	blocking=True,
	items=[
	ui.progress(
	label="",
	caption=f"0% complete",
	value=0,
	),
	],
	)

	count = 0
	f = open(file_name, "wb")

	# This loop runs until your entire file has downloaded
	while True:
	buffer = r.read(block_sz)
	if not buffer:
	# we have finished downloading the data
	q.page["meta"].dialog = None
	break
	f.write(buffer)

	# Our user will wait longer if we update the UI for every byte-block, so we will only updated it every
	# 250 iterations
	# if you are running an async job you can replace the below 3 lines of uncommented code a 2-second sleep timer:
	# await update_progress_ui(q, file_name, file_size); await q.sleep(2)
	if count % 250 == 0:
	await update_progress_ui(q, file_name, file_size)
	count += 1

	f.close()

	# For this demo, we show we have the dataset
	df = pd.read_csv(file_name, nrows=10)

	q.page["file_rows"] = ui.form_card(
	box="1 3 -1 -1",
	items=[
	ui.table(
	name="my_data",
	columns=[ui.table_column(col, col) for col in df.columns.values],
	rows=[
	ui.table_row(
	name=str(i),
	cells=[str(df[col].values[i]) for col in df.columns.values],
	)
	for i in range(len(df))
	],
	)
	],
	)


	async def update_progress_ui(q, file_name, file_size):
	# If your data pulling job has an api for current size or percent, use that instead!
	# We use os.path.getsize() in this tutorial to be agnostic to data connector
	file_size_dl = os.path.getsize(file_name)
	status = int(file_size_dl * 100.0 // file_size)

	# Update the progress bar
	q.page["meta"].dialog.items[0].progress.caption = f"{status}% complete"
	q.page["meta"].dialog.items[0].progress.value = status / 100

	await q.page.save()