Skip to content

Instantly share code, notes, and snippets.

@moust
Created April 23, 2024 14:48
Show Gist options
  • Save moust/7ce10b15d5cab93fe5bb9a0e99eeff4a to your computer and use it in GitHub Desktop.
Save moust/7ce10b15d5cab93fe5bb9a0e99eeff4a to your computer and use it in GitHub Desktop.
import polars as pl
@pl.api.register_expr_namespace("preprocessing")
class Preprocessing:
def __init__(self, expr: pl.Expr):
self._expr = expr
def scale(self) -> pl.Expr:
return (self._expr - self._expr.min()) / (self._expr.max() - self._expr.min())
def normalize(self) -> pl.Expr:
return (self._expr - self._expr.mean()) / self._expr.std()
pl.DataFrame(data=[1, 2, 3]).select(
[
pl.all().preprocessing.scale().alias("scaled"),
pl.all().preprocessing.normalize().alias("normalized"),
]
)
# shape: (3, 1) shape: (3, 2)
# ┌──────────┐ ┌────────┬────────────┐
# │ column_0 │ │ scaled ┆ normalized │
# │ --- │ │ --- ┆ --- │
# │ i64 │ │ f64 ┆ f64 │
# ╞══════════╡ >> ╞════════╪════════════╡
# │ 1 │ │ 0.0 ┆ -1.0 │
# │ 2 │ │ 0.5 ┆ 0.0 │
# │ 3 │ │ 1.0 ┆ 1.0 │
# └──────────┘ └────────┴────────────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment