Skip to content

Instantly share code, notes, and snippets.

@braaannigan
Last active September 15, 2022 15:12
Show Gist options
  • Save braaannigan/5acfd60b94be300ca6ed599a9723d922 to your computer and use it in GitHub Desktop.
Save braaannigan/5acfd60b94be300ca6ed599a9723d922 to your computer and use it in GitHub Desktop.
from datetime import timedelta, datetime
import polars as pl
# Make a dataframe with a one hour time series at 10-min intervals for group "a"
df = (
pl.DataFrame(
{'date':pl.date_range(start,stop,timedelta(minutes=10))
})
.with_column(
pl.lit('a').alias('group_id'))
# Add a row count column for some simple values
.with_row_count('value')
# Remove the value at 20 minutes past the hour
.filter(pl.col('value')!=2)
)
# Concatenate this dataframe with itself to form a separate group "b"
df2 = (
pl.concat(
[
df,
df.select(['value','date']).with_column(pl.lit('b').alias('group_id'))
]
)
)
dfList = []
for sub_df in df2.groupby("group_id"):
dfList.append(
sub_df.upsample('date','10m').interpolate().fill_null(strategy='forward')
)
correctedDf = pl.concat(dfList)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment