Ha Dinh hadinh1306

## find_reviewers.py
from collections import defaultdict
import pandas as pd

MEANINGFUL_COLLABORATION_DAYS = 15

INITIAL_SLOTS = 3
INITIAL_WORKLOAD = 3

REVIEWER_WORKLOAD = 6
REVIEWER_SLOTS = 6

## assign_additionals.py
# Assign additional team member reviewers if there are still reviewer slots available #
intermediate_peer_reviewers = pd.concat([team_member_reviewers_df, cross_team_reviewers_df])

# Remove team member pairs that are already assigned
reviewer_pair_keys = team_df[["employee", "collaborator_employee"]].columns.values.tolist()
team_index = team_df.set_index(reviewer_pair_keys).index
team_member_reviewers_index = team_member_reviewers_df.set_index(reviewer_pair_keys).index
additional_team_members_df = team_df.loc[~team_index.isin(team_member_reviewers_index)]

additional_team_members_df = _calculate_available_slots(intermediate_peer_reviewers, additional_team_members_df)

## calculate_available_slots.py
REVIEWER_WORKLOAD = 6
REVIEWER_SLOTS = 6

def _calculate_available_slots(df, next_step_df):
    available_slots_df = df.groupby("employee", as_index=False)["collaborator_employee"].count()
    available_slots_df.loc[:, "available_slots"] = REVIEWER_SLOTS - available_slots_df.loc[:, "collaborator_employee"]
    available_slots_df = available_slots_df.drop(columns="collaborator_employee")

    available_workload_df = df.groupby("collaborator_employee", as_index=False)["employee"].count()
    available_workload_df.loc[:, "available_workload"] = REVIEWER_WORKLOAD - available_workload_df.loc[:, "employee"]

## assign_peers.py
from collections import defaultdict

INITIAL_SLOTS = 3
INITIAL_WORKLOAD = 3

def _assign_peer_reviewers(df):
    reviewer_slots = defaultdict(list)
    reviewer_workload = defaultdict(list)

    df_copy = df.copy()

## rank_collaborator.py
import pandas as pd

MEANINGFUL_COLLABORATION_DAYS = 15

def _rank_collaborators(df, keep_meaningful_collaborations=True):
    df_copy = df.copy()

    if keep_meaningful_collaborations:
        meaningful_collaborations_mask = df_copy["collaboration_days"] >= MEANINGFUL_COLLABORATION_DAYS
        df_copy = df_copy.loc[meaningful_collaborations_mask]

## peer_review_fake_data.py
import pandas as pd

# Create a fake DataFrame
data = {
    'employee': ['Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Mai', 'Mai', 'Mai', 'Mai', 'Mai'],
    'collaborator_employee': ['Minh', 'Mai', 'Lam', 'Nguyen', 'Chau', 'Giang', 'Minh', 'Ha', 'Lam', 'Nguyen', 'Chau'],
    'collaboration_days': [30, 25, 10, 60, 50, 5, 60, 25, 12, 15, 1]
}

df = pd.DataFrame(data)

## spotify_playlist_extract.py
import logging
import os

import spotipy
from spotipy.oauth2 import SpotifyOAuth

import pandas as pd

from utils import CLIENT_ID
from utils import CLIENT_SECRET

## geometric_progression.py
from collections.abc import Sequence

def identify_geometric_progression(sequence):
    """
    Determine if a sequence is a geometric progression.
    """
    assert isinstance(sequence, Sequence) & (not isinstance(sequence, str)), "Expect input to be a sequence that's not string"
    assert len(sequence) > 2, "Expect a sequence with more than 2 items"
    try:
        ratio = sequence[1]/sequence[0]

## binning.py
rating_bins = [0, 1, 2, 3, 4, 5]
rating_bin_labels = ['0-1', '1-2', '2-3', '3-4', '4-5']

df['RatingGroups'] = pd.cut(df['Stars'], rating_bins, include_lowest=True, right=True, labels=rating_bin_labels)

## worst_ramen_string_rating.py
string_star_mask = df['Stars'].isin(['Unrated', 'NR', '1/4', '1/2', '1/3',
                                     '3.5/2.5', '4/4', '5/5', '4.5/5',
                                     '5/2.5', '5/4', '4.25/5'])
df_length = len(df)

print(f"Percentage of rows with `Unrated`, `NR` or mixing rates in the dataset is
      {np.sum(string_star_mask)*100/df_length:.2}%.")

# Remove string ratings from the dataset
df = df[~string_star_mask]
	from collections import defaultdict
	import pandas as pd

	MEANINGFUL_COLLABORATION_DAYS = 15

	INITIAL_SLOTS = 3
	INITIAL_WORKLOAD = 3

	REVIEWER_WORKLOAD = 6
	REVIEWER_SLOTS = 6
	# Assign additional team member reviewers if there are still reviewer slots available #
	intermediate_peer_reviewers = pd.concat([team_member_reviewers_df, cross_team_reviewers_df])

	# Remove team member pairs that are already assigned
	reviewer_pair_keys = team_df[["employee", "collaborator_employee"]].columns.values.tolist()
	team_index = team_df.set_index(reviewer_pair_keys).index
	team_member_reviewers_index = team_member_reviewers_df.set_index(reviewer_pair_keys).index
	additional_team_members_df = team_df.loc[~team_index.isin(team_member_reviewers_index)]

	additional_team_members_df = _calculate_available_slots(intermediate_peer_reviewers, additional_team_members_df)
	REVIEWER_WORKLOAD = 6
	REVIEWER_SLOTS = 6

	def _calculate_available_slots(df, next_step_df):
	available_slots_df = df.groupby("employee", as_index=False)["collaborator_employee"].count()
	available_slots_df.loc[:, "available_slots"] = REVIEWER_SLOTS - available_slots_df.loc[:, "collaborator_employee"]
	available_slots_df = available_slots_df.drop(columns="collaborator_employee")

	available_workload_df = df.groupby("collaborator_employee", as_index=False)["employee"].count()
	available_workload_df.loc[:, "available_workload"] = REVIEWER_WORKLOAD - available_workload_df.loc[:, "employee"]
	import pandas as pd

	MEANINGFUL_COLLABORATION_DAYS = 15

	def _rank_collaborators(df, keep_meaningful_collaborations=True):
	df_copy = df.copy()

	if keep_meaningful_collaborations:
	meaningful_collaborations_mask = df_copy["collaboration_days"] >= MEANINGFUL_COLLABORATION_DAYS
	df_copy = df_copy.loc[meaningful_collaborations_mask]
	import pandas as pd

	# Create a fake DataFrame
	data = {
	'employee': ['Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Ha', 'Mai', 'Mai', 'Mai', 'Mai', 'Mai'],
	'collaborator_employee': ['Minh', 'Mai', 'Lam', 'Nguyen', 'Chau', 'Giang', 'Minh', 'Ha', 'Lam', 'Nguyen', 'Chau'],
	'collaboration_days': [30, 25, 10, 60, 50, 5, 60, 25, 12, 15, 1]
	}

	df = pd.DataFrame(data)
	import logging
	import os

	import spotipy
	from spotipy.oauth2 import SpotifyOAuth

	import pandas as pd

	from utils import CLIENT_ID
	from utils import CLIENT_SECRET
	from collections.abc import Sequence

	def identify_geometric_progression(sequence):
	"""
	Determine if a sequence is a geometric progression.
	"""
	assert isinstance(sequence, Sequence) & (not isinstance(sequence, str)), "Expect input to be a sequence that's not string"
	assert len(sequence) > 2, "Expect a sequence with more than 2 items"
	try:
	ratio = sequence[1]/sequence[0]
	rating_bins = [0, 1, 2, 3, 4, 5]
	rating_bin_labels = ['0-1', '1-2', '2-3', '3-4', '4-5']

	df['RatingGroups'] = pd.cut(df['Stars'], rating_bins, include_lowest=True, right=True, labels=rating_bin_labels)
	string_star_mask = df['Stars'].isin(['Unrated', 'NR', '1/4', '1/2', '1/3',
	'3.5/2.5', '4/4', '5/5', '4.5/5',
	'5/2.5', '5/4', '4.25/5'])
	df_length = len(df)

	print(f"Percentage of rows with `Unrated`, `NR` or mixing rates in the dataset is
	{np.sum(string_star_mask)*100/df_length:.2}%.")

	# Remove string ratings from the dataset
	df = df[~string_star_mask]