Skip to content

Instantly share code, notes, and snippets.

@YannisDC
Created March 24, 2023 19:45
Show Gist options
  • Save YannisDC/eae92eeb64c061532f863d5b26f6f2a6 to your computer and use it in GitHub Desktop.
Save YannisDC/eae92eeb64c061532f863d5b26f6f2a6 to your computer and use it in GitHub Desktop.
Gets Reviews from Trustpilot and plots them
import requests
from bs4 import BeautifulSoup
import csv
def fetch_reviews(company_name):
# Initialize variables
url = f"https://www.trustpilot.com/review/{company_name}"
reviews_list = []
while url:
# Send a request to the URL
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
# Find and parse reviews
reviews = soup.find_all("section", class_="styles_reviewContentwrapper__zH_9M")
for review in reviews:
foundTitle = review.find("h2", class_="typography_heading-s__f7029")
if foundTitle is None:
title = "NaN"
else:
title = foundTitle.text.strip()
foundContent = review.find("p", class_="typography_body-l__KUYFJ")
if foundContent is None:
content = "NaN"
else:
content = review.find("p", class_="typography_body-l__KUYFJ").text.strip()
rating = review.find("img")["alt"].split()[1]
date = review.find("time")["datetime"].split("T")[0]
reviews_list.append({"title": title, "content": content, "rating": rating, "date": date})
# Find the next page URL
next_page = soup.find("a", {"data-page-number": "next-page"})
if next_page:
url = "https://www.trustpilot.com" + next_page["href"]
else:
url = None
# Save reviews to CSV
with open(f"{company_name}_reviews.csv", "w", newline="", encoding="utf-8") as csvfile:
fieldnames = ["title", "content", "rating", "date"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for review in reviews_list:
writer.writerow(review)
print(f"Successfully saved {len(reviews_list)} reviews for {company_name} to {company_name}_reviews.csv")
# Example usage
company_name = "www.chronopost.fr"
fetch_reviews(company_name)
import pandas as pd
import matplotlib.pyplot as plt
def plot_reviews(csv_file):
# Read the CSV file and parse dates
df = pd.read_csv(csv_file, parse_dates=["date"])
# Convert ratings to numeric values
df["rating"] = pd.to_numeric(df["rating"])
# Sort the DataFrame by date
df.sort_values("date", inplace=True)
# Plot the data
plt.plot(df["date"], df["rating"], marker="o", linestyle="")
# Set plot labels and title
plt.xlabel("Date")
plt.ylabel("Rating")
plt.title("Reviews Ratings Over Time")
# Display the plot
plt.show()
csv_file = f"{company_name}_reviews.csv"
plot_reviews(csv_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment