Skip to content

Instantly share code, notes, and snippets.

@afranzi
Created May 30, 2019 16:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afranzi/294f42ccd95baff0e7f9266d73ade484 to your computer and use it in GitHub Desktop.
Save afranzi/294f42ccd95baff0e7f9266d73ade484 to your computer and use it in GitHub Desktop.
# Install quinn>=0.3.1
from quinn.extensions.dataframe_ext import DataFrame
def with_idx(id_col: str, output_col: str) -> Callable[[DataFrame], DataFrame]:
def inner(df: DataFrame) -> DataFrame:
window = Window.orderBy(id_col)
unique_activity_ids = df \
.select(id_col).distinct() \
.withColumn(output_col, F.row_number().over(window))
return df.join(unique_activity_ids, on=[id_col])
return inner
def compute_int_users_and_activities(df: DataFrame) -> DataFrame:
return df \
.transform(with_idx('activityId', 'activityIdx')) \
.transform(with_idx('userId', 'userIdx'))
def compute_ratings_matrix(user_events_df: DataFrame, rec_features_df: DataFrame, factors: List[dict]) -> DataFrame:
return rec_features_df \
.transform(compute_rating_factors(user_events_df, factors)) \
.transform(compute_relevance_scores) \
.transform(compute_int_users_and_activities)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment