Skip to content

Instantly share code, notes, and snippets.

@royerk
Last active October 16, 2023 01:33
Show Gist options
  • Save royerk/9eab520bf1b544b1fc2afdb811a861df to your computer and use it in GitHub Desktop.
Save royerk/9eab520bf1b544b1fc2afdb811a861df to your computer and use it in GitHub Desktop.
AB infer from log
import pandas as pd
df = pd.DataFrame(
[
["2023-01-01", "device_1", 800, True],
["2023-01-01", "device_2", 900, True],
["2023-01-02", "device_1", 800, True],
["2023-01-02", "device_3", 900, True],
["2023-01-03", "device_2", 900, True],
["2023-01-04", "device_1", 999, True],
["2023-01-05", "device_2", 900, True],
],
columns=["date", "device_id", "platoon", "participated"],
)
# device 1 missing in day 2
# device 2 missing in day 3
# device 3 started on day 1, missing in day 3 -> will show in day 1 with "missing" platoon
# device 1 changed platoon on day 4 -> will show as new platoon baseline on day 5
print(df)
def update_mapping(df: pd.DataFrame, platoon_mapping: dict = None) -> dict:
"""
Update platoon mapping with new platoon assignments.
:param df: DataFrame with columns: date, device_id, platoon, participated to detect platoons
:platoon_mapping: dictionary of platoon mapping, e.g. {800: 820}, can be partial
"""
for platoon in df["platoon"].unique():
if platoon_mapping.get(platoon, None) is None:
platoon_mapping[platoon] = f"{platoon}b"
return platoon_mapping
def infer_device_participation(
device_id: int,
device_df: pd.DataFrame,
platoon_mapping: dict,
missing_platoon: str,
date_range: pd.date_range,
) -> pd.DataFrame:
"""
Infer days where a device did not participate and assign a baseline platoon.
:param device_id: device id
:param device_df: DataFrame with columns: date, device_id, platoon, participated
:param platoon_mapping: dictionary of platoon mapping, e.g. {800: 820}, can be partial
:param missing_platoon: string to use for missing platoon
:param date_range: date range to infer participation
"""
# until we identify the first platoon, we assume it is missing
last_platoon = missing_platoon
for date in date_range:
# if the device participated, we update the last platoon
if date in device_df["date"].values:
last_platoon = device_df[device_df["date"] == date]["platoon"].iloc[0]
# if the device did not participate, we add a row with the last platoon
if date not in device_df["date"].values:
row_to_add = pd.DataFrame(
[
{
"date": date,
"device_id": device_id,
"platoon": platoon_mapping[last_platoon],
"participated": False,
}
],
)
device_df = pd.concat([device_df, row_to_add])
return device_df
def infer_participation(df: pd.DataFrame, platoon_mapping: dict = {}) -> pd.DataFrame:
"""
Infer days where a device did not participate in a platoon,
and fill in the missing days with the device associated with a new platoon (or a platoon mapping).
:param df: DataFrame with columns: date, device_id, platoon, participated
:param platoon_mapping: dictionary of platoon mapping, e.g. {800: 820}, can be partial
:return: DataFrame with columns: date, device_id, platoon, participated
"""
# convert date to datetime and platoon to string
df["date"] = pd.to_datetime(df["date"])
df["platoon"] = df["platoon"].astype(str)
# update platoon mapping with platoon detected in df
platoon_mapping = update_mapping(df, platoon_mapping)
# add missing platoon to platoon mapping
missing_platoon = "missing"
platoon_mapping[missing_platoon] = missing_platoon
# create date range from min to max date
date_range = pd.date_range(start=df["date"].min(), end=df["date"].max())
result_list = []
for device_id in df["device_id"].unique():
result_list.append(
infer_device_participation(
device_id=device_id,
device_df=df[df["device_id"] == device_id],
platoon_mapping=platoon_mapping,
missing_platoon=missing_platoon,
date_range=date_range,
)
)
# concatenate all device data and sort by date and device_id
result_df = pd.concat(result_list)
result_df = result_df.sort_values(by=["date", "device_id"]).reset_index(drop=True)
return result_df
print(infer_participation(df, platoon_mapping={"800": "820"}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment