Skip to content

Instantly share code, notes, and snippets.

@hadinh1306
Created August 6, 2023 22:06
Show Gist options
  • Save hadinh1306/a56fd2e04b4bdeb1ad912af3275311ca to your computer and use it in GitHub Desktop.
Save hadinh1306/a56fd2e04b4bdeb1ad912af3275311ca to your computer and use it in GitHub Desktop.
from collections import defaultdict
import pandas as pd
MEANINGFUL_COLLABORATION_DAYS = 15
INITIAL_SLOTS = 3
INITIAL_WORKLOAD = 3
REVIEWER_WORKLOAD = 6
REVIEWER_SLOTS = 6
def _rank_collaborators(df, keep_meaningful_collaborations=True):
df_copy = df.copy()
if keep_meaningful_collaborations:
meaningful_collaborations_mask = df_copy["collaboration_days"] >= MEANINGFUL_COLLABORATION_DAYS
df_copy = df_copy.loc[meaningful_collaborations_mask]
# Rank collaborator higher if they have more collaboration days
df_copy.loc[:, "reviewer_ranking"] = df_copy.groupby("employee")["collaboration_days"].rank(ascending=False)
# Rank employee higher if they have more collaboration days with reviewers
df_copy.loc[:, "reviewe_ranking"] = df_copy.groupby("collaborator_employee")["collaboration_days"].rank(ascending=False)
# Rank reviewer-reviewee pairs higher if they rank higher in each other's list
df_copy.loc[:, "sum_ranking"] = df_copy.loc[:, "reviewer_ranking"] + df_copy.loc[:, "reviewe_ranking"]
df_copy.loc[:, "overall_ranking"] = df_copy["sum_ranking"].rank()
df_copy = df_copy.sort_values(by="overall_ranking")
return df_copy
def _assign_peer_reviewers(df):
reviewer_slots = defaultdict(list)
reviewer_workload = defaultdict(list)
df_copy = df.copy()
if "available_slots" not in df_copy.columns.values:
df_copy["available_slots"] = INITIAL_SLOTS
if "available_workload" not in df_copy.columns.values:
df_copy["available_workload"] = INITIAL_WORKLOAD
for row in df_copy.itertuples():
employee = row.employee
reviewer = row.collaborator_employee
available_slots = row.available_slots
available_workload = row.available_workload
assigned_reviewer_slots = len(reviewer_slots[employee])
assigned_reviewer_workload = len(reviewer_workload[employee])
if (assigned_reviewer_slots < available_slots) & (assigned_reviewer_workload < available_workload):
reviewer_slots[employee].append(reviewer)
reviewer_workload[employee].append(employee)
else:
continue
reviewer_slots_df = pd.DataFrame.from_dict(reviewer_slots, orient="index").T.unstack().dropna().reset_index(level=0)
reviewer_slots_df = reviewer_slots_df.rename(columns={"level_0": "employee", 0: "collaborator_employee"})
return reviewer_slots_df
def _calculate_available_slots(df, next_step_df):
available_slots_df = df.groupby("employee", as_index=False)["collaborator_employee"].count()
available_slots_df.loc[:, "available_slots"] = REVIEWER_SLOTS - available_slots_df.loc[:, "collaborator_employee"]
available_slots_df = available_slots_df.drop(columns="collaborator_employee")
available_workload_df = df.groupby("collaborator_employee", as_index=False)["employee"].count()
available_workload_df.loc[:, "available_workload"] = REVIEWER_WORKLOAD - available_workload_df.loc[:, "employee"]
available_workload_df = available_workload_df.drop(columns="employee")
next_step_df = (
next_step_df.merge(available_slots_df, how="left", on="employee")
.merge(available_workload_df, how="left", on="collaborator_employee")
)
# If an employee doesn't have any reviewers, it means they still have all slots
next_step_df["available_slots"] = next_step_df["available_slots"].fillna(REVIEWER_SLOTS)
# If a collaborator doesn't have any reviewees, it means they still have all workload
next_step_df["available_workload"] = next_step_df["available_workload"].fillna(REVIEWER_WORKLOAD)
return next_step_df
# Create a fake DataFrame
data = {
'employee': ['Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Mai', 'Mai', 'Mai', 'Mai', 'Mai'],
'collaborator_employee': ['Minh', 'Mai', 'Lam', 'Nguyen', 'Chau', 'Giang', 'Minh', 'Ha', 'Lam', 'Nguyen', 'Chau'],
'collaboration_days': [30, 25, 10, 60, 50, 5, 60, 25, 12, 15, 1],
'reviewer_bucket': ['team', 'team', 'team', 'cross-team', 'cross-team', 'cross-team', 'team', 'team', 'team', 'cross-team', 'cross-team']
}
df = pd.DataFrame(data)
team_df = df.loc[df["reviewer_bucket"] == "team"]
cross_team_df = df.loc[df["reviewer_bucket"] == "cross-team"]
# Assign team member
ranked_team_df = _rank_collaborators(team_df)
team_member_reviewers_df = _assign_peer_reviewers(ranked_team_df)
# Assign cross-team member
ranked_cross_team_df = _rank_collaborators(cross_team_df)
ranked_cross_team_df = _calculate_available_slots(team_member_reviewers_df, ranked_cross_team_df)
cross_team_reviewers_df = _assign_peer_reviewers(ranked_cross_team_df)
## Assign additional team member reviewers if there are still reviewer slots available #
intermediate_peer_reviewers = pd.concat([team_member_reviewers_df, cross_team_reviewers_df])
## Remove team member pairs that are already assigned
reviewer_pair_keys = team_df[["employee", "collaborator_employee"]].columns.values.tolist()
team_index = team_df.set_index(reviewer_pair_keys).index
team_member_reviewers_index = team_member_reviewers_df.set_index(reviewer_pair_keys).index
additional_team_members_df = team_df.loc[~team_index.isin(team_member_reviewers_index)]
additional_team_members_df = _calculate_available_slots(intermediate_peer_reviewers, additional_team_members_df)
ranked_additional_team_members_df = _rank_collaborators(additional_team_members_df, keep_meaningful_collaborations=False)
additional_team_member_reviewers_df = _assign_peer_reviewers(ranked_additional_team_members_df)
final_list = pd.concat([additional_team_member_reviewers_df, team_member_reviewers_df, cross_team_reviewers_df])
final_list = final_list.sort_values(by="employee")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment