Skip to content

Instantly share code, notes, and snippets.

@mtanco
Created December 20, 2020 18:31
Show Gist options
  • Save mtanco/43f80c8c219aea52d8a708f05be21bb1 to your computer and use it in GitHub Desktop.
Save mtanco/43f80c8c219aea52d8a708f05be21bb1 to your computer and use it in GitHub Desktop.
Example of how to format a pandas dataframe for plotting with native Wave plot functions
# Plot / Dataframe
# Examples of how to format pandas data when plotting
# Use the `tolist()` function on `df.columns` and `df.values` along with Wave's `data` class
# ---
from h2o_wave import site, data, ui, main
import pandas as pd
import numpy as np
# Page to hold our charts
page = site['/demo']
n = 100
df = pd.DataFrame({'length': np.random.rand(n),
'width': np.random.rand(n),
'data_type': np.random.choice(a=['Train', 'Test'], size=n, p=[0.8, 0.2])
})
print(df.head()) # logging: view data shape
# length width data_type
# 0 0.675802 0.022420 Train
# 1 0.025449 0.527442 Train
# 2 0.236596 0.497024 Train
# 3 0.971468 0.025671 Train
# 4 0.159988 0.564333 Train
# Plot two numeric columns by each other and color based on a third, categorical column
v = page.add('scatter', ui.plot_card(
box='1 1 4 4',
title='Scatter Plot from Dataframe',
data=data(
fields=df.columns.tolist(),
rows=df.values.tolist()
),
plot=ui.plot(marks=[ui.mark(type='point',
x='=length', x_title='Length (cm)',
y='=width', y_title='Width (cm)',
color='=data_type', shape='circle',
)])
))
# Aggregate the data in pandas and plot a bar chart of the average value of one column by some other column
df_agg = df.groupby(['data_type']).mean().reset_index()
print(df_agg.head()) # logging: view data shape
# data_type length width
# 0 Test 0.438477 0.484714
# 1 Train 0.574956 0.495716
v = page.add('bar', ui.plot_card(
box='5 1 4 4',
title='Bar Plot from Aggregated Dataframe',
data=data(
fields=df_agg.columns.tolist(),
rows=df_agg.values.tolist()
),
plot=ui.plot(marks=[ui.mark(type='interval',
x='=data_type', x_title='Modeling Data Type',
y='=length', y_title='Length (cm)',
)])
))
# After aggregating use melt to format wide data as tall data for other types of plotting
df_agg_melt = pd.melt(df_agg, id_vars=['data_type'], value_vars=['length', 'width'])
print(df_agg_melt.head())
# data_type variable value
# 0 Test length 0.581804
# 1 Train length 0.526647
# 2 Test width 0.406644
# 3 Train width 0.477375
v = page.add('bar_group', ui.plot_card(
box='1 5 4 4',
title='Grouped Bar Plot from Data Frame',
data=data(
fields=df_agg_melt.columns.tolist(),
rows=df_agg_melt.values.tolist()
),
plot=ui.plot(marks=[ui.mark(type='interval',
x='=variable', x_title='Data Feature',
y='=value', y_title='cm',
color='=data_type', dodge='auto',
)])
))
page.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment