Skip to content

Instantly share code, notes, and snippets.

@ryandhubbard
Created October 27, 2023 03:04
Show Gist options
  • Save ryandhubbard/c8b38c71aad364c0bca13fe6dc29fea6 to your computer and use it in GitHub Desktop.
Save ryandhubbard/c8b38c71aad364c0bca13fe6dc29fea6 to your computer and use it in GitHub Desktop.
This script will automatically scrape app reviews from ios and android app stores. It will email you the results and show any changes within the last 7 days to the top reviews displayed in each app store.
# %%
import requests
from bs4 import BeautifulSoup
import pandas as pd
from IPython.display import HTML
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
from email.mime.text import MIMEText
import smtplib
from smtplib import SMTPException
import xlwt
from xlwt import Workbook
from datetime import datetime, timedelta
def find_between( s, first, last ):
try:
start = s.index( first ) + len( first )
end = s.index( last, start )
return s[start:end]
except ValueError:
return ""
def find_between_r( s, first, last ):
try:
start = s.rindex( first ) + len( first )
end = s.rindex( last, start )
return s[start:end]
except ValueError:
return ""
def google_reviews(url):
reviews = []
url = url
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
mydivs = soup.find_all("div", {"class": "EGFGHd"})
current_review_rating = find_between(str(soup),'<div class="jILTFe">','</div>')
for i in mydivs:
text = str(i)
name = find_between(text,'<div class="X5PpBb">','</div>')
date = find_between(text,'<span class="bp9Aid">','</span>')
message = find_between(text,'<div class="h3YV2d">','</div>')
helpful_count = find_between(text,'<div class="AJTPZc" jsname="J0d7Yd">','</div>')
score = find_between(text,'aria-label="Rated',' stars out of five stars')
dict = {
'app': url,
'name': name,
'date':date,
'message':message,
'helpful_count':helpful_count,
'score':score,
'agg_score':current_review_rating
}
reviews.append(dict)
return reviews, current_review_rating, url
def apple_reviews(url):
reviews = []
url = url + '?see-all=reviews'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
mydivs = soup.find_all("div", {"class": "we-customer-review"})
current_review_rating = find_between(str(soup),'<span class="we-customer-ratings__averages__display">','</span>')
for i in mydivs:
text = str(i)
message = find_between(text,'<p data-test-bidi="" dir="false">','</p>')
nickname = find_between(text,'<span class="we-truncate we-truncate--single-line we-customer-review__user" dir="ltr">\n ','\n</span>')
date = find_between(text,'datetime="','">')
score = find_between(text,'<span class="we-star-rating-stars we-star-rating-stars-','"></span>')
subjects = i.find("h3", {"class": "we-customer-review__title"}).text
subjects = find_between(subjects,'\n ','\n')
developer_response = find_between(text,'Developer Response','<!-- --></blockquote>')
clean_developer_response = find_between(developer_response,'<p data-test-bidi="" dir="false">','</p>')
dict = {
'app': url,
'name': nickname,
'date':date,
'message':message,
'developer_response':clean_developer_response,
'subject':subjects,
'score':score,
'store':'ios',
'agg_score':current_review_rating
}
reviews.append(dict)
return reviews, current_review_rating, url
ios = [
'https://apps.apple.com/us/app/temu-shop-like-a-billionaire/id1641486558',
'https://apps.apple.com/us/app/chatgpt/id6448311069',
'https://apps.apple.com/us/app/google/id284815942'
]
android = [
'https://play.google.com/store/apps/details?id=com.zhiliaoapp.musically&hl=en_US&gl=US',
'https://play.google.com/store/apps/details?id=com.google.android.apps.subscriptions.red&hl=en_US&gl=US',
'https://play.google.com/store/apps/details?id=com.espn.score_center&hl=en_US&gl=US'
]
ios_reviews = []
android_review = []
def get_ios_reviews():
for i in range(len(ios)):
if ios[i] != '':
ios_reviews.append(apple_reviews(ios[i]))
def get_android_reviews():
for i in range(len(android)):
if android[i] != '':
android_review.append(google_reviews(android[i]))
get_ios_reviews()
get_android_reviews()
# %%
df2 = pd.DataFrame()
df3 = pd.DataFrame()
df2 = pd.DataFrame([review for app_reviews in android_review for review in app_reviews[0]])
df3 = pd.DataFrame([review for app_reviews in ios_reviews for review in app_reviews[0]])
# %%
df5 = df2.copy()
# df5.loc[df5[['app','agg_score'].duplicated(),['app','agg_score']]='']
df5=df5.set_index(['app','agg_score','name']) #export to excel without index=False
df6 = df3.copy()
df6=df6.set_index(['app','agg_score','name']) #export to excel without index=False
writer = pd.ExcelWriter('Top_App_Reviews.xlsx', engine = 'xlsxwriter')
df5.to_excel(writer, sheet_name = 'Android')
df6.to_excel(writer, sheet_name = 'iOS')
writer.close()
# %%
recent_ios = df5[(pd.to_datetime(df5['date'])>datetime.today() - timedelta(days=7))]
recent_Android = df6[(pd.to_datetime(df6['date'], format='%Y-%m-%dT%H:%M:%S.%fZ')>datetime.today() - timedelta(days=7))]
# %%
result = recent_ios.to_html()
result2 = recent_Android.to_html()
message = """<span STYLE="font-weight:bold">Recent iOS Reviews: </span></br>"""
if recent_ios.empty:
message += 'No New Top iOS Reviews in last 7 Days </br> </br> <span STYLE="font-weight:bold">Recent Android Reviews:</span></br>'
else:
message += result + '</br> </br> Recent Android Reviews:'
if recent_Android.empty:
message += 'No Recent Top Android Reviews in last 7 Days'
else:
message += result2
message += """
</br> </br></br> </br></br> </br>This is an automated report.
"""
try:
# open and read the file in binary
msg = MIMEMultipart()
body_part = MIMEText(message, 'html')
msg['Subject'] = 'Top App Reviews'
msg['From'] = 'reviews@example.com'
msg['To'] = 'example@example.com;'
# Add body to email
msg.attach(body_part)
# open and read the file in binary
with open('Top_App_Reviews.xlsx','rb') as file:
# Attach the file with filename to the email
msg.attach(MIMEApplication(file.read(), Name='Top_App_Reviews.xlsx'))
# Create SMTP objectsmtp.proflowers.com
smtp_obj = smtplib.SMTP('smtp.domain.com')
# Convert the message to a string and send it
smtp_obj.sendmail(msg['From'], msg['To'], msg.as_string())
smtp_obj.quit()
except SMTPException as e:
error_code = e.smtp_code
error_message = e.smtp_error
print(error_message)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment