Skip to content

Instantly share code, notes, and snippets.

View hadinh1306's full-sized avatar
๐Ÿ“

Ha Dinh hadinh1306

๐Ÿ“
View GitHub Profile
from collections import defaultdict
import pandas as pd
MEANINGFUL_COLLABORATION_DAYS = 15
INITIAL_SLOTS = 3
INITIAL_WORKLOAD = 3
REVIEWER_WORKLOAD = 6
REVIEWER_SLOTS = 6
# Assign additional team member reviewers if there are still reviewer slots available #
intermediate_peer_reviewers = pd.concat([team_member_reviewers_df, cross_team_reviewers_df])
# Remove team member pairs that are already assigned
reviewer_pair_keys = team_df[["employee", "collaborator_employee"]].columns.values.tolist()
team_index = team_df.set_index(reviewer_pair_keys).index
team_member_reviewers_index = team_member_reviewers_df.set_index(reviewer_pair_keys).index
additional_team_members_df = team_df.loc[~team_index.isin(team_member_reviewers_index)]
additional_team_members_df = _calculate_available_slots(intermediate_peer_reviewers, additional_team_members_df)
REVIEWER_WORKLOAD = 6
REVIEWER_SLOTS = 6
def _calculate_available_slots(df, next_step_df):
available_slots_df = df.groupby("employee", as_index=False)["collaborator_employee"].count()
available_slots_df.loc[:, "available_slots"] = REVIEWER_SLOTS - available_slots_df.loc[:, "collaborator_employee"]
available_slots_df = available_slots_df.drop(columns="collaborator_employee")
available_workload_df = df.groupby("collaborator_employee", as_index=False)["employee"].count()
available_workload_df.loc[:, "available_workload"] = REVIEWER_WORKLOAD - available_workload_df.loc[:, "employee"]
from collections import defaultdict
INITIAL_SLOTS = 3
INITIAL_WORKLOAD = 3
def _assign_peer_reviewers(df):
reviewer_slots = defaultdict(list)
reviewer_workload = defaultdict(list)
df_copy = df.copy()
import pandas as pd
MEANINGFUL_COLLABORATION_DAYS = 15
def _rank_collaborators(df, keep_meaningful_collaborations=True):
df_copy = df.copy()
if keep_meaningful_collaborations:
meaningful_collaborations_mask = df_copy["collaboration_days"] >= MEANINGFUL_COLLABORATION_DAYS
df_copy = df_copy.loc[meaningful_collaborations_mask]
@hadinh1306
hadinh1306 / peer_review_fake_data.py
Created August 6, 2023 21:57
peer_review_fake_data.py
import pandas as pd
# Create a fake DataFrame
data = {
'employee': ['Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Mai', 'Mai', 'Mai', 'Mai', 'Mai'],
'collaborator_employee': ['Minh', 'Mai', 'Lam', 'Nguyen', 'Chau', 'Giang', 'Minh', 'Ha', 'Lam', 'Nguyen', 'Chau'],
'collaboration_days': [30, 25, 10, 60, 50, 5, 60, 25, 12, 15, 1]
}
df = pd.DataFrame(data)
import logging
import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import pandas as pd
from utils import CLIENT_ID
from utils import CLIENT_SECRET
from collections.abc import Sequence
def identify_geometric_progression(sequence):
"""
Determine if a sequence is a geometric progression.
"""
assert isinstance(sequence, Sequence) & (not isinstance(sequence, str)), "Expect input to be a sequence that's not string"
assert len(sequence) > 2, "Expect a sequence with more than 2 items"
try:
ratio = sequence[1]/sequence[0]
rating_bins = [0, 1, 2, 3, 4, 5]
rating_bin_labels = ['0-1', '1-2', '2-3', '3-4', '4-5']
df['RatingGroups'] = pd.cut(df['Stars'], rating_bins, include_lowest=True, right=True, labels=rating_bin_labels)
string_star_mask = df['Stars'].isin(['Unrated', 'NR', '1/4', '1/2', '1/3',
'3.5/2.5', '4/4', '5/5', '4.5/5',
'5/2.5', '5/4', '4.25/5'])
df_length = len(df)
print(f"Percentage of rows with `Unrated`, `NR` or mixing rates in the dataset is
{np.sum(string_star_mask)*100/df_length:.2}%.")
# Remove string ratings from the dataset
df = df[~string_star_mask]