RenSys/Pandas - Periodic Dataframes

## Pandas - Periodic Dataframes
from glob import glob
import pandas as pd

folder = "data"
filename_wildcard = f"{folder}/*.gz"

tags = [
    "timestamp",
    "01_dv_01_mi_daca_pv",
    "01_dv_01_si_daca_pv",
    "01_ladder_depth_daca_pv",
    "01_mfi_051_daca_pv",
    "01_pp_51_mi_daca_pv",
    "01_sg_01_si_daca_pv",
    "01_sg_01_tact_daca_pv",
    "01_wi_14_mi_daca_pv",
    "01_wi_14_si_daca_pv",
    "01_wi_15_mi_daca_pv",
    "01_wi_15_si_daca_pv",
    "01_wi_16_si_daca_pv",
    "01_wi_17_mi_daca_pv",
    "01_wi_17_pos_daca_pv",
    "01_wi_17_si_daca_pv",
    "ctw_port_ref_sel_op",
    "ctw_selreala_out",
    "ctw_stbrd_ref_sel_op",
    "swing_lim_daca_pv",
    "01_wi_15_kw_daca_pv",
    "01_wi_16_kw_daca_pv",
]
df = pd.concat(
    [
        pd.read_csv(fn, parse_dates=True, index_col="timestamp", usecols=tags)
        for fn in glob(filename_wildcard)
    ],
    sort=True,
).sort_index()

num_of_weeks = len(df.groupby(pd.Grouper(freq="W")).count())
start_dates = (
    pd.date_range(
        start=df.index[0],
        normalize=True,
        freq="1W",
        periods=num_of_weeks,
        closed="right",
    )
    - pd.to_timedelta("1W")
    + pd.to_timedelta("6H")
    + pd.to_timedelta("1D")
)

end_dates = (
    pd.date_range(
        start=df.index[0],
        normalize=True,
        freq="1W",
        periods=num_of_weeks,
        closed="right",
    )
    + pd.to_timedelta("5H")
    + pd.to_timedelta("59min")
    + pd.to_timedelta("59S")
    + pd.to_timedelta("1D")
)

dfs = {
    idx: df[start:end]
    for idx, (start, end) in enumerate(zip(start_dates, end_dates))
    if len(df[start:end].groupby(pd.Grouper(freq="D")).count()) > 6
}
	from glob import glob
	import pandas as pd

	folder = "data"
	filename_wildcard = f"{folder}/*.gz"

	tags = [
	"timestamp",
	"01_dv_01_mi_daca_pv",
	"01_dv_01_si_daca_pv",
	"01_ladder_depth_daca_pv",
	"01_mfi_051_daca_pv",
	"01_pp_51_mi_daca_pv",
	"01_sg_01_si_daca_pv",
	"01_sg_01_tact_daca_pv",
	"01_wi_14_mi_daca_pv",
	"01_wi_14_si_daca_pv",
	"01_wi_15_mi_daca_pv",
	"01_wi_15_si_daca_pv",
	"01_wi_16_si_daca_pv",
	"01_wi_17_mi_daca_pv",
	"01_wi_17_pos_daca_pv",
	"01_wi_17_si_daca_pv",
	"ctw_port_ref_sel_op",
	"ctw_selreala_out",
	"ctw_stbrd_ref_sel_op",
	"swing_lim_daca_pv",
	"01_wi_15_kw_daca_pv",
	"01_wi_16_kw_daca_pv",
	]
	df = pd.concat(
	[
	pd.read_csv(fn, parse_dates=True, index_col="timestamp", usecols=tags)
	for fn in glob(filename_wildcard)
	],
	sort=True,
	).sort_index()

	num_of_weeks = len(df.groupby(pd.Grouper(freq="W")).count())
	start_dates = (
	pd.date_range(
	start=df.index[0],
	normalize=True,
	freq="1W",
	periods=num_of_weeks,
	closed="right",
	)
	- pd.to_timedelta("1W")
	+ pd.to_timedelta("6H")
	+ pd.to_timedelta("1D")
	)

	end_dates = (
	pd.date_range(
	start=df.index[0],
	normalize=True,
	freq="1W",
	periods=num_of_weeks,
	closed="right",
	)
	+ pd.to_timedelta("5H")
	+ pd.to_timedelta("59min")
	+ pd.to_timedelta("59S")
	+ pd.to_timedelta("1D")
	)

	dfs = {
	idx: df[start:end]
	for idx, (start, end) in enumerate(zip(start_dates, end_dates))
	if len(df[start:end].groupby(pd.Grouper(freq="D")).count()) > 6
	}