Skip to content

Instantly share code, notes, and snippets.

@mattijn
Created October 22, 2023 22:01
Show Gist options
  • Save mattijn/ac749df17bd5ed9c6bdec621f90096b3 to your computer and use it in GitHub Desktop.
Save mattijn/ac749df17bd5ed9c6bdec621f90096b3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d6c12234-a766-44c8-9fb6-aae38c1afdf2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"┌──────────────┐\n",
"│ count_star() │\n",
"│ int64 │\n",
"├──────────────┤\n",
"│ 50000000 │\n",
"└──────────────┘"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import duckdb\n",
"import altair as alt\n",
"from ipywidgets import HTML, HBox\n",
"\n",
"np.random.seed(0)\n",
"no_steps = 50000000\n",
"steps = np.random.choice([-1, 1], no_steps)\n",
"random_walk = np.cumsum(steps)\n",
"\n",
"df = pd.DataFrame(random_walk, columns=['y'])\n",
"df.index.name = 'x'\n",
"df = df.reset_index()\n",
"\n",
"con = duckdb.connect()\n",
"con.register('df', df)\n",
"\n",
"con.query('SELECT count() FROM df')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "495de73b-3da1-4350-89c6-cc60a260fa7b",
"metadata": {},
"outputs": [],
"source": [
"def m4data(con, width, lowerbound, upperbound):\n",
" cur = con.cursor()\n",
" df_m4 = cur.query(f'''\n",
" SELECT \n",
" min(x) AS min_x, \n",
" arg_min(y, x) AS argmin_yx, \n",
" max(x) AS max_x, \n",
" arg_max(y, x) AS argmax_yx, \n",
" min(y) AS min_y, \n",
" arg_min(x, y) AS argmin_xy, \n",
" max(y) AS max_y, \n",
" arg_max(x, y) AS argmax_xy, \n",
" round({width} * (x - {lowerbound}) / ({upperbound} - {lowerbound})) AS bin\n",
" FROM df\n",
" WHERE x BETWEEN {lowerbound} AND {upperbound}\n",
" GROUP BY bin \n",
" ''') \n",
" \n",
" return df_m4.df()\n",
"\n",
"def chart_original(df):\n",
" x_range = [df.min_x.min(), df.max_x.max()]\n",
" interval = alt.selection_interval(encodings=['x'], bind='scales', name='interval', value={\"x\":x_range})\n",
" bind_range = alt.binding_range(min=100, max=400, name='Chart width: ')\n",
" param_width = alt.param('width', bind=bind_range, value=300)\n",
" \n",
" title=alt.Title(alt.expr(f'\"This chart is \" + {param_width.name} + \" px wide\"'))\n",
" chart = alt.Chart(df, title=title).mark_rect(tooltip=True).encode(\n",
" x='min_x:Q', \n",
" x2='max_x:Q', \n",
" y='min_y:Q', \n",
" y2='max_y:Q'\n",
" ).add_params(interval, param_width)\n",
" return chart\n",
"\n",
"def chart_updated(df, w):\n",
" chart = alt.Chart(df, width=w, title='auto chart').mark_line(\n",
" tooltip=True, \n",
" point=True,\n",
" interpolate='natural'\n",
" ).encode(\n",
" x=alt.X('min_x:Q').scale(zero=False, nice=False), \n",
" y=alt.Y('min_y:Q').scale(domain=[-6000, 12000])\n",
" )\n",
" return chart"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "79e9ab25-b64a-46ba-a1e3-bb42322e0555",
"metadata": {},
"outputs": [],
"source": [
"init_w = 300\n",
"df_reduced = m4data(con, width=init_w, lowerbound=0, upperbound=no_steps)\n",
"jchart_original = alt.JupyterChart(chart_original(df_reduced))\n",
"jchart_updated = alt.JupyterChart(chart_updated(df_reduced, w=init_w))\n",
"\n",
"#HBox([jchart_original, jchart_updated])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "10641599-3af3-4251-a8ca-3b8eacefeda8",
"metadata": {},
"outputs": [],
"source": [
"# # manual tests\n",
"# lb, ub = jchart_original.selections.interval.value['min_x']\n",
"# w = jchart_original.params.width\n",
"# print(w, lb, ub)\n",
"# df_reduced_interactive = m4data(con, width=w, lowerbound=lb, upperbound=ub)\n",
"# jchart_updated.chart = chart_updated(df_reduced_interactive, w)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5fd8c29e-a552-49f7-8725-de2ac6c75389",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4b40342f4b214bfd8cc7403fff53d324",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'data…"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def on_width_change(width_change):\n",
" w = width_change.new\n",
" # w = jchart_original.params.width \n",
" lb, ub = jchart_original.selections.interval.value['min_x']\n",
" \n",
" df_reduced_width = m4data(con, width=w, lowerbound=int(lb), upperbound=int(ub))\n",
" print(w, lb, ub, df_reduced_width.shape)\n",
" jchart_updated.chart = chart_updated(df_reduced_width, w=w)\n",
"\n",
"def on_interval_change(interval_change):\n",
" lb, ub = interval_change.new.value['min_x']\n",
" # lb, ub = jchart_original.selections.interval.value['min_x']\n",
" w = jchart_original.params.width\n",
"\n",
" df_reduced_interval = m4data(con, width=w, lowerbound=int(lb), upperbound=int(ub))\n",
" print(w, lb, ub, df_reduced_interval.shape)\n",
" jchart_updated.chart = chart_updated(df_reduced_interval, w=w)\n",
"\n",
"jchart_original.params.observe(on_width_change, [\"width\"])\n",
"jchart_original.selections.observe(on_interval_change, [\"interval\"])\n",
"\n",
"HBox([jchart_original, jchart_updated])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52c18bd0-bd23-45e1-a7ce-653d092e045e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@Kunleiky
Copy link

Awesome gist

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment