Last active
October 16, 2023 01:33
-
-
Save royerk/9eab520bf1b544b1fc2afdb811a861df to your computer and use it in GitHub Desktop.
AB infer from log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.DataFrame( | |
[ | |
["2023-01-01", "device_1", 800, True], | |
["2023-01-01", "device_2", 900, True], | |
["2023-01-02", "device_1", 800, True], | |
["2023-01-02", "device_3", 900, True], | |
["2023-01-03", "device_2", 900, True], | |
["2023-01-04", "device_1", 999, True], | |
["2023-01-05", "device_2", 900, True], | |
], | |
columns=["date", "device_id", "platoon", "participated"], | |
) | |
# device 1 missing in day 2 | |
# device 2 missing in day 3 | |
# device 3 started on day 1, missing in day 3 -> will show in day 1 with "missing" platoon | |
# device 1 changed platoon on day 4 -> will show as new platoon baseline on day 5 | |
print(df) | |
def update_mapping(df: pd.DataFrame, platoon_mapping: dict = None) -> dict: | |
""" | |
Update platoon mapping with new platoon assignments. | |
:param df: DataFrame with columns: date, device_id, platoon, participated to detect platoons | |
:platoon_mapping: dictionary of platoon mapping, e.g. {800: 820}, can be partial | |
""" | |
for platoon in df["platoon"].unique(): | |
if platoon_mapping.get(platoon, None) is None: | |
platoon_mapping[platoon] = f"{platoon}b" | |
return platoon_mapping | |
def infer_device_participation( | |
device_id: int, | |
device_df: pd.DataFrame, | |
platoon_mapping: dict, | |
missing_platoon: str, | |
date_range: pd.date_range, | |
) -> pd.DataFrame: | |
""" | |
Infer days where a device did not participate and assign a baseline platoon. | |
:param device_id: device id | |
:param device_df: DataFrame with columns: date, device_id, platoon, participated | |
:param platoon_mapping: dictionary of platoon mapping, e.g. {800: 820}, can be partial | |
:param missing_platoon: string to use for missing platoon | |
:param date_range: date range to infer participation | |
""" | |
# until we identify the first platoon, we assume it is missing | |
last_platoon = missing_platoon | |
for date in date_range: | |
# if the device participated, we update the last platoon | |
if date in device_df["date"].values: | |
last_platoon = device_df[device_df["date"] == date]["platoon"].iloc[0] | |
# if the device did not participate, we add a row with the last platoon | |
if date not in device_df["date"].values: | |
row_to_add = pd.DataFrame( | |
[ | |
{ | |
"date": date, | |
"device_id": device_id, | |
"platoon": platoon_mapping[last_platoon], | |
"participated": False, | |
} | |
], | |
) | |
device_df = pd.concat([device_df, row_to_add]) | |
return device_df | |
def infer_participation(df: pd.DataFrame, platoon_mapping: dict = {}) -> pd.DataFrame: | |
""" | |
Infer days where a device did not participate in a platoon, | |
and fill in the missing days with the device associated with a new platoon (or a platoon mapping). | |
:param df: DataFrame with columns: date, device_id, platoon, participated | |
:param platoon_mapping: dictionary of platoon mapping, e.g. {800: 820}, can be partial | |
:return: DataFrame with columns: date, device_id, platoon, participated | |
""" | |
# convert date to datetime and platoon to string | |
df["date"] = pd.to_datetime(df["date"]) | |
df["platoon"] = df["platoon"].astype(str) | |
# update platoon mapping with platoon detected in df | |
platoon_mapping = update_mapping(df, platoon_mapping) | |
# add missing platoon to platoon mapping | |
missing_platoon = "missing" | |
platoon_mapping[missing_platoon] = missing_platoon | |
# create date range from min to max date | |
date_range = pd.date_range(start=df["date"].min(), end=df["date"].max()) | |
result_list = [] | |
for device_id in df["device_id"].unique(): | |
result_list.append( | |
infer_device_participation( | |
device_id=device_id, | |
device_df=df[df["device_id"] == device_id], | |
platoon_mapping=platoon_mapping, | |
missing_platoon=missing_platoon, | |
date_range=date_range, | |
) | |
) | |
# concatenate all device data and sort by date and device_id | |
result_df = pd.concat(result_list) | |
result_df = result_df.sort_values(by=["date", "device_id"]).reset_index(drop=True) | |
return result_df | |
print(infer_participation(df, platoon_mapping={"800": "820"})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment