# Filter all reviews | |
listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache() | |
listings_score_only_df = listings_score_only_df.withColumn('id', | |
listings_score_only_df['id'].cast(IntegerType())) | |
listings_score_only_df = listings_score_only_df.filter(col('id').isNotNull()) | |
listings_score_only_df = listings_score_only_df.withColumn('review_scores_rating', | |
listings_score_only_df['review_scores_rating'].cast(IntegerType())) | |
listings_score_only_df = listings_score_only_df.filter(col('review_scores_rating').isNotNull()) | |
listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache() | |
listings_score_only_df = listings_score_only_df.withColumn('id', | |
listings_score_only_df['id'].cast(IntegerType())) | |
listings_score_only_df = listings_score_only_df.filter(col('id').isNotNull()) | |
listings_score_only_df = listings_score_only_df.withColumn('review_scores_rating', | |
listings_score_only_df['review_scores_rating'].cast(IntegerType())) | |
listings_score_only_df = listings_score_only_df.filter(col('review_scores_rating').isNotNull()) | |
reviews_with_score_df = reviews_df.select(['listing_id', 'comments']).cache() | |
reviews_with_score_df = reviews_with_score_df.withColumn('listing_id', | |
reviews_with_score_df['listing_id'].cast(IntegerType())) | |
# Remove any rows with null values | |
reviews_with_score_df = reviews_with_score_df.filter(col('listing_id').isNotNull()) | |
reviews_with_score_df = reviews_with_score_df.filter(col('comments').isNotNull()) | |
# join all listing reviews and mean score for the given property | |
reviews_with_score_df = reviews_with_score_df.join(listings_score_only_df, | |
reviews_with_score_df.listing_id == listings_score_only_df.id)\ | |
.select(reviews_with_score_df['*'], | |
listings_score_only_df['review_scores_rating'].alias('mean_score')).cache | |
# View some of the reviews for the first property | |
print(reviews_with_score_df.show()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment