Skip to content

Instantly share code, notes, and snippets.

@krassowski
Last active October 19, 2022 14:09
Show Gist options
  • Save krassowski/159bb0c76ff47edb031710e2cff6862f to your computer and use it in GitHub Desktop.
Save krassowski/159bb0c76ff47edb031710e2cff6862f to your computer and use it in GitHub Desktop.
Simple pandas DataFrame explorer for JupyterLab (using sidecar)
from pandas_explorer import pandas_explorer
from pandas import read_csv
iris = read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
pandas_explorer(iris, title='Iris')
# Copyright (c) 2021 Michał Krassowski.
# Distributed under the terms of the Modified BSD License.
from sidecar import Sidecar
from ipywidgets import widgets
from IPython.display import display, update_display, HTML
from types import SimpleNamespace
from pandas import DataFrame, option_context
import string
import re
def pandas_explorer(data: DataFrame, title='Explorer', default_rows=30, drop_index=False):
data = data.reset_index(drop=drop_index)
split_on = '|'.join(re.escape(x) for x in string.punctuation + ' ')
table_widgets = SimpleNamespace(
row_filter=widgets.Text(description='Rows filter'),
row_filter_fuzzy=widgets.Checkbox(description='Fuzzy', value=True),
show_index=widgets.Checkbox(description='Index', value=False),
max_rows=widgets.IntSlider(value=default_rows, description='Max rows', min=0, max=len(data)),
max_columns=widgets.IntSlider(value=10, description='Max columns', min=0, max=len(data.columns)),
sort_column=widgets.Dropdown(options=[None, *data.columns], description='Sort'),
sort_ascending=widgets.Checkbox(description='Ascending')
)
filtered_from = None
def highlight(value, substring, marker='b', options=''):
value = str(value)
value = value.split(substring)
return f'<{marker} {options}>{substring}</{marker}>'.join(value)
def split(text: str):
return [
v
for v in re.split(split_on, text)
if v
]
def contains(value, substring: str, fuzzy: bool):
if fuzzy:
parts = split(substring)
if len(parts) > 1:
return all(
contains(value, part, fuzzy=fuzzy)
for part in parts
)
# todo case sensitivity option?
return substring.lower() in value.lower()
def show_frame(
row_filter: str, row_filter_fuzzy: bool,
show_index: bool, max_rows: int, max_columns: int,
sort_column: str, sort_ascending: bool
):
nonlocal filtered_from
df = data.copy()
if row_filter:
df = df[
df.apply(
lambda row: (
row
.astype(str)
.apply(contains, substring=row_filter, fuzzy=row_filter_fuzzy)
.any()
),
axis=1)
]
if len(df) < max_rows and not filtered_from:
filtered_from = max_rows
def highlight_matches(value):
if row_filter:
if row_filter_fuzzy:
parts = split(row_filter)
for part in parts:
value = highlight(value, part)
else:
value = highlight(value, row_filter)
return value
columns_to_hide = list(df.columns)[max_columns:]
notes = []
if max_rows < len(df):
notes.append(f'{len(df) - max_rows} rows hidden')
if columns_to_hide:
notes.append(f'{len(columns_to_hide)} columns hidden')
if sort_column is not None:
df = df.sort_values(sort_column, ascending=sort_ascending)
try:
styled = (
df.head(max_rows).style
.hide_columns(columns_to_hide)
.format(highlight_matches)
.set_caption(' '.join(notes))
)
if not show_index:
styled = styled.hide_index()
displayed = display(styled)
except ValueError as e:
if 'style is not supported for non-unique indices' not in e.args[0]:
raise
with option_context('display.max_rows', max_rows):
styled = df.head(max_rows).loc[:,df.columns.isin(columns_to_hide)]
displayed = display(styled)
if filtered_from is not None and len(df) != 0:
current_value = min(filtered_from, len(df))
if len(df) > filtered_from:
filtered_from = None
table_widgets.max_rows.max = current_value
table_widgets.max_rows.value = current_value
table_widgets.max_rows.max = len(df)
return displayed
sc = Sidecar(title=title)
# https://github.com/jupyter-widgets/jupyterlab-sidecar/issues/25
sc_out = widgets.Output(layout={'overflow': 'scroll', 'max-width': '100%'})
with sc:
display(sc_out)
with sc_out:
out = widgets.interactive_output(
show_frame,
vars(table_widgets)
)
ui = widgets.VBox([
widgets.HBox([table_widgets.row_filter, table_widgets.row_filter_fuzzy]),
widgets.HBox([table_widgets.max_rows, table_widgets.max_columns]),
widgets.HBox([table_widgets.sort_column, table_widgets.sort_ascending]),
])
out_box = widgets.Box(
[out],
# sadly hard-coded to allow for scroll as sidcar has layout issues
layout=widgets.Layout(max_width='600px', max_height='1300px')
)
display(
widgets.VBox([
ui,
out_box
])
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment