Created
August 6, 2023 22:06
-
-
Save hadinh1306/a56fd2e04b4bdeb1ad912af3275311ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from collections import defaultdict | |
| import pandas as pd | |
| MEANINGFUL_COLLABORATION_DAYS = 15 | |
| INITIAL_SLOTS = 3 | |
| INITIAL_WORKLOAD = 3 | |
| REVIEWER_WORKLOAD = 6 | |
| REVIEWER_SLOTS = 6 | |
| def _rank_collaborators(df, keep_meaningful_collaborations=True): | |
| df_copy = df.copy() | |
| if keep_meaningful_collaborations: | |
| meaningful_collaborations_mask = df_copy["collaboration_days"] >= MEANINGFUL_COLLABORATION_DAYS | |
| df_copy = df_copy.loc[meaningful_collaborations_mask] | |
| # Rank collaborator higher if they have more collaboration days | |
| df_copy.loc[:, "reviewer_ranking"] = df_copy.groupby("employee")["collaboration_days"].rank(ascending=False) | |
| # Rank employee higher if they have more collaboration days with reviewers | |
| df_copy.loc[:, "reviewe_ranking"] = df_copy.groupby("collaborator_employee")["collaboration_days"].rank(ascending=False) | |
| # Rank reviewer-reviewee pairs higher if they rank higher in each other's list | |
| df_copy.loc[:, "sum_ranking"] = df_copy.loc[:, "reviewer_ranking"] + df_copy.loc[:, "reviewe_ranking"] | |
| df_copy.loc[:, "overall_ranking"] = df_copy["sum_ranking"].rank() | |
| df_copy = df_copy.sort_values(by="overall_ranking") | |
| return df_copy | |
| def _assign_peer_reviewers(df): | |
| reviewer_slots = defaultdict(list) | |
| reviewer_workload = defaultdict(list) | |
| df_copy = df.copy() | |
| if "available_slots" not in df_copy.columns.values: | |
| df_copy["available_slots"] = INITIAL_SLOTS | |
| if "available_workload" not in df_copy.columns.values: | |
| df_copy["available_workload"] = INITIAL_WORKLOAD | |
| for row in df_copy.itertuples(): | |
| employee = row.employee | |
| reviewer = row.collaborator_employee | |
| available_slots = row.available_slots | |
| available_workload = row.available_workload | |
| assigned_reviewer_slots = len(reviewer_slots[employee]) | |
| assigned_reviewer_workload = len(reviewer_workload[employee]) | |
| if (assigned_reviewer_slots < available_slots) & (assigned_reviewer_workload < available_workload): | |
| reviewer_slots[employee].append(reviewer) | |
| reviewer_workload[employee].append(employee) | |
| else: | |
| continue | |
| reviewer_slots_df = pd.DataFrame.from_dict(reviewer_slots, orient="index").T.unstack().dropna().reset_index(level=0) | |
| reviewer_slots_df = reviewer_slots_df.rename(columns={"level_0": "employee", 0: "collaborator_employee"}) | |
| return reviewer_slots_df | |
| def _calculate_available_slots(df, next_step_df): | |
| available_slots_df = df.groupby("employee", as_index=False)["collaborator_employee"].count() | |
| available_slots_df.loc[:, "available_slots"] = REVIEWER_SLOTS - available_slots_df.loc[:, "collaborator_employee"] | |
| available_slots_df = available_slots_df.drop(columns="collaborator_employee") | |
| available_workload_df = df.groupby("collaborator_employee", as_index=False)["employee"].count() | |
| available_workload_df.loc[:, "available_workload"] = REVIEWER_WORKLOAD - available_workload_df.loc[:, "employee"] | |
| available_workload_df = available_workload_df.drop(columns="employee") | |
| next_step_df = ( | |
| next_step_df.merge(available_slots_df, how="left", on="employee") | |
| .merge(available_workload_df, how="left", on="collaborator_employee") | |
| ) | |
| # If an employee doesn't have any reviewers, it means they still have all slots | |
| next_step_df["available_slots"] = next_step_df["available_slots"].fillna(REVIEWER_SLOTS) | |
| # If a collaborator doesn't have any reviewees, it means they still have all workload | |
| next_step_df["available_workload"] = next_step_df["available_workload"].fillna(REVIEWER_WORKLOAD) | |
| return next_step_df | |
| # Create a fake DataFrame | |
| data = { | |
| 'employee': ['Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Mai', 'Mai', 'Mai', 'Mai', 'Mai'], | |
| 'collaborator_employee': ['Minh', 'Mai', 'Lam', 'Nguyen', 'Chau', 'Giang', 'Minh', 'Ha', 'Lam', 'Nguyen', 'Chau'], | |
| 'collaboration_days': [30, 25, 10, 60, 50, 5, 60, 25, 12, 15, 1], | |
| 'reviewer_bucket': ['team', 'team', 'team', 'cross-team', 'cross-team', 'cross-team', 'team', 'team', 'team', 'cross-team', 'cross-team'] | |
| } | |
| df = pd.DataFrame(data) | |
| team_df = df.loc[df["reviewer_bucket"] == "team"] | |
| cross_team_df = df.loc[df["reviewer_bucket"] == "cross-team"] | |
| # Assign team member | |
| ranked_team_df = _rank_collaborators(team_df) | |
| team_member_reviewers_df = _assign_peer_reviewers(ranked_team_df) | |
| # Assign cross-team member | |
| ranked_cross_team_df = _rank_collaborators(cross_team_df) | |
| ranked_cross_team_df = _calculate_available_slots(team_member_reviewers_df, ranked_cross_team_df) | |
| cross_team_reviewers_df = _assign_peer_reviewers(ranked_cross_team_df) | |
| ## Assign additional team member reviewers if there are still reviewer slots available # | |
| intermediate_peer_reviewers = pd.concat([team_member_reviewers_df, cross_team_reviewers_df]) | |
| ## Remove team member pairs that are already assigned | |
| reviewer_pair_keys = team_df[["employee", "collaborator_employee"]].columns.values.tolist() | |
| team_index = team_df.set_index(reviewer_pair_keys).index | |
| team_member_reviewers_index = team_member_reviewers_df.set_index(reviewer_pair_keys).index | |
| additional_team_members_df = team_df.loc[~team_index.isin(team_member_reviewers_index)] | |
| additional_team_members_df = _calculate_available_slots(intermediate_peer_reviewers, additional_team_members_df) | |
| ranked_additional_team_members_df = _rank_collaborators(additional_team_members_df, keep_meaningful_collaborations=False) | |
| additional_team_member_reviewers_df = _assign_peer_reviewers(ranked_additional_team_members_df) | |
| final_list = pd.concat([additional_team_member_reviewers_df, team_member_reviewers_df, cross_team_reviewers_df]) | |
| final_list = final_list.sort_values(by="employee") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment