Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Filter all reviews
listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache()
listings_score_only_df = listings_score_only_df.withColumn('id',
listings_score_only_df['id'].cast(IntegerType()))
listings_score_only_df = listings_score_only_df.filter(col('id').isNotNull())
listings_score_only_df = listings_score_only_df.withColumn('review_scores_rating',
listings_score_only_df['review_scores_rating'].cast(IntegerType()))
listings_score_only_df = listings_score_only_df.filter(col('review_scores_rating').isNotNull())
listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache()
listings_score_only_df = listings_score_only_df.withColumn('id',
listings_score_only_df['id'].cast(IntegerType()))
listings_score_only_df = listings_score_only_df.filter(col('id').isNotNull())
listings_score_only_df = listings_score_only_df.withColumn('review_scores_rating',
listings_score_only_df['review_scores_rating'].cast(IntegerType()))
listings_score_only_df = listings_score_only_df.filter(col('review_scores_rating').isNotNull())
reviews_with_score_df = reviews_df.select(['listing_id', 'comments']).cache()
reviews_with_score_df = reviews_with_score_df.withColumn('listing_id',
reviews_with_score_df['listing_id'].cast(IntegerType()))
# Remove any rows with null values
reviews_with_score_df = reviews_with_score_df.filter(col('listing_id').isNotNull())
reviews_with_score_df = reviews_with_score_df.filter(col('comments').isNotNull())
# join all listing reviews and mean score for the given property
reviews_with_score_df = reviews_with_score_df.join(listings_score_only_df,
reviews_with_score_df.listing_id == listings_score_only_df.id)\
.select(reviews_with_score_df['*'],
listings_score_only_df['review_scores_rating'].alias('mean_score')).cache
# View some of the reviews for the first property
print(reviews_with_score_df.show())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment