Skip to content

Instantly share code, notes, and snippets.

@swo
Last active December 6, 2023 03:24
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save swo/6a682721c90095cbf05c76ba3bee1b67 to your computer and use it in GitHub Desktop.
Save swo/6a682721c90095cbf05c76ba3bee1b67 to your computer and use it in GitHub Desktop.
Polars cheat sheet
import polars as pl
# Making data frames ------------------------------------------
# with dictionary
df = pl.DataFrame({
'name': ['foo', 'bar', 'baz'],
'bar': [0, 1, 2],
'qux': [0.0, 1.0, 2.0]
})
# as a list of Series
df = pl.DataFrame([
pl.Series(['foo', 'bar', 'baz']).alias('name'),
pl.Series([0, 1, 2]).alias('bar'),
pl.Series([0.0, 1.0, 2.0]).alias('qux')
])
# Selecting and renaming -----------------------------------
# Select columns (and drop the rest)
df.select(['x', 'y'])
# Drop a column (and keep the rest)
df.drop('x')
# Rename column
df.rename({'old_name': 'new_name'})
# Select (and drop the rest) and rename
df.select(pl.col('x').alias('new_x'))
df.select(new_x = pl.col('x'))
# Mutation --------------------------------------------------
# Mutate and select
df.select(
pl.col('name').sort(),
pl.col('bar').sum().alias('bar_sum')
)
# Adding a new column
df.with_columns(constant_col = pl.Series([1]))
df.with_columns([pl.Series([1]).alias('constant_col')])
# Simple math on one column
df.with_columns(x_plus_one = pl.col('x') + 1)
df.with_columns((pl.col('x') + 1).alias('x_plus_one'))
# Mutate a column in place
df.with_columns(pl.col('x') + 1) # column 'x' gets 1 added to it
# Simple math across columns
df.with_columns(x_plus_y = pl.col('x') + pl.col('y'))
# Functions of multiple columns
df.with_columns(
pl.struct(['x', 'y'])
.apply(lambda row: row['x'] + row['y'])
.alias('x_plus_y')
)
# Mutate across multiple columns
df.select(pl.col('a', 'b') + 1)
import polars.selectors as cs
df.with_columns(cs.by_name(['x', 'y']) + 1)
# Grouped mutation ------------------------------------------
# Aggregate results, reducing number of rows
df.group_by('group').agg(sum_by_group = pl.col('x').sum())
# Do a mutation within a group without aggregating
df.with_columns(cumsum_x_by_group = pl.col('x').cumsum().over('group'))
# Counting
df.group_by('group').count()
# Printings -----------------------------------------------
# print all rows
with pl.Config() as cfg:
cfg.set_tbl_rows(-1)
print(df)
def print_all(df):
with pl.Config() as cfg:
cfg.set_tbl_rows(-1)
print(df)
df.pipe(print_all)
# Similar for all columns with set_tbl_cols
# Dates ---------------------------------------------------
# round down to prior Sunday
x.dt.offset_by("1d").dt.truncate("1w").dt.offset_by("-1d")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment