Skip to content

Instantly share code, notes, and snippets.

@RenSys
Last active May 3, 2019 06:34
Show Gist options
  • Save RenSys/c0df5fc2889ad1d92cab5ee11d64eedd to your computer and use it in GitHub Desktop.
Save RenSys/c0df5fc2889ad1d92cab5ee11d64eedd to your computer and use it in GitHub Desktop.
Pandas - Dict of Periodic Dataframes
from glob import glob
import pandas as pd
folder = "data"
filename_wildcard = f"{folder}/*.gz"
tags = [
"timestamp",
"01_dv_01_mi_daca_pv",
"01_dv_01_si_daca_pv",
"01_ladder_depth_daca_pv",
"01_mfi_051_daca_pv",
"01_pp_51_mi_daca_pv",
"01_sg_01_si_daca_pv",
"01_sg_01_tact_daca_pv",
"01_wi_14_mi_daca_pv",
"01_wi_14_si_daca_pv",
"01_wi_15_mi_daca_pv",
"01_wi_15_si_daca_pv",
"01_wi_16_si_daca_pv",
"01_wi_17_mi_daca_pv",
"01_wi_17_pos_daca_pv",
"01_wi_17_si_daca_pv",
"ctw_port_ref_sel_op",
"ctw_selreala_out",
"ctw_stbrd_ref_sel_op",
"swing_lim_daca_pv",
"01_wi_15_kw_daca_pv",
"01_wi_16_kw_daca_pv",
]
df = pd.concat(
[
pd.read_csv(fn, parse_dates=True, index_col="timestamp", usecols=tags)
for fn in glob(filename_wildcard)
],
sort=True,
).sort_index()
num_of_weeks = len(df.groupby(pd.Grouper(freq="W")).count())
start_dates = (
pd.date_range(
start=df.index[0],
normalize=True,
freq="1W",
periods=num_of_weeks,
closed="right",
)
- pd.to_timedelta("1W")
+ pd.to_timedelta("6H")
+ pd.to_timedelta("1D")
)
end_dates = (
pd.date_range(
start=df.index[0],
normalize=True,
freq="1W",
periods=num_of_weeks,
closed="right",
)
+ pd.to_timedelta("5H")
+ pd.to_timedelta("59min")
+ pd.to_timedelta("59S")
+ pd.to_timedelta("1D")
)
dfs = {
idx: df[start:end]
for idx, (start, end) in enumerate(zip(start_dates, end_dates))
if len(df[start:end].groupby(pd.Grouper(freq="D")).count()) > 6
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment