Skip to content

Instantly share code, notes, and snippets.

@ritchie46
Created November 19, 2021 11:00
Show Gist options
  • Save ritchie46/aba83e13133e39c09a42e2aca5220548 to your computer and use it in GitHub Desktop.
Save ritchie46/aba83e13133e39c09a42e2aca5220548 to your computer and use it in GitHub Desktop.
answering a question
size = 10 ** 2
df = pl.DataFrame({
"groupid": [floor(i*0.1)for i in range(size)],
"vectors": [[i,i+1,i-1] for i in range(size)],
"numbers": [i for i in range(size)]
})
print(df)
# Outputs
# shape: (100, 3)
# ┌─────────┬───────────────┬─────────┐
# │ groupid ┆ vectors ┆ numbers │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ list [i64] ┆ i64 │
# ╞═════════╪═══════════════╪═════════╡
# │ 0 ┆ [0, 1, -1] ┆ 0 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 0 ┆ [1, 2, 0] ┆ 1 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 0 ┆ [2, 3, 1] ┆ 2 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 0 ┆ [3, 4, 2] ┆ 3 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ ... ┆ ... ┆ ... │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 9 ┆ [95, 96, 94] ┆ 95 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 9 ┆ [96, 97, 95] ┆ 96 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 9 ┆ [97, 98, 96] ┆ 97 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 9 ┆ [98, 99, 97] ┆ 98 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 9 ┆ [99, 100, 98] ┆ 99 │
# └─────────┴───────────────┴─────────┘
# create a default, so we don't have to allocate in a hot loop
DEFAULT = pl.Series(np.zeros(768))
# note that a polars custom function must return a Series or a number when used in groupby -> apply
def my_agg(s):
if len(s) == 0:
return DEFAULT
return pl.Series(s.to_numpy().mean(axis=0))
out = (df.groupby("groupid")
.agg([
pl.col("vectors").apply(my_agg).alias("aggregated_vectors"),
pl.mean("numbers").alias("mean_of_numbers_1"),
pl.col("numbers").filter(pl.col("numbers") > 20).mean().alias("mean_of_numbers > 20")
])
)
print(out)
# shape: (10, 4)
# ┌─────────┬────────────────────┬───────────────────┬──────────────────────┐
# │ groupid ┆ aggregated_vectors ┆ mean_of_numbers_1 ┆ mean_of_numbers > 20 │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ list [i64] ┆ f64 ┆ f64 │
# ╞═════════╪════════════════════╪═══════════════════╪══════════════════════╡
# │ 0 ┆ [4, 5, 3] ┆ 4.5 ┆ null │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 4 ┆ [44, 45, 43] ┆ 44.5 ┆ 44.5 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 2 ┆ [24, 25, 23] ┆ 24.5 ┆ 25 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 5 ┆ [54, 55, 53] ┆ 54.5 ┆ 54.5 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ ... ┆ ... ┆ ... ┆ ... │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 9 ┆ [94, 95, 93] ┆ 94.5 ┆ 94.5 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 8 ┆ [84, 85, 83] ┆ 84.5 ┆ 84.5 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 6 ┆ [64, 65, 63] ┆ 64.5 ┆ 64.5 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 3 ┆ [34, 35, 33] ┆ 34.5 ┆ 34.5 │
# ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 7 ┆ [74, 75, 73] ┆ 74.5 ┆ 74.5 │
# └─────────┴────────────────────┴───────────────────┴──────────────────────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment