Skip to content

Instantly share code, notes, and snippets.

@htlin222
Last active January 26, 2023 15:48
Show Gist options
  • Save htlin222/e3f404159dfadd29e47a3956c1a6b5f9 to your computer and use it in GitHub Desktop.
Save htlin222/e3f404159dfadd29e47a3956c1a6b5f9 to your computer and use it in GitHub Desktop.
以 python 用 google_play_scraper 和 app_store_scraper 兩個套件來爬app的評論,存成兩份excel檔
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# title: google and apple app review
# date: "2023-01-26"
from google_play_scraper import Sort, reviews_all
from app_store_scraper import AppStore
import pandas as pd
from urllib.parse import quote
import numpy as np
def apple_app_scraper():
'''
e.g. https://apps.apple.com/tw/app/台北捷運go/id997212021
'''
your_app_name = '台北捷運go'
unicode_app_name = quote(your_app_name)
apple_app = AppStore(country='tw', app_name=unicode_app_name, app_id='997212021')
apple_app.review(how_many=2000)
apple_app_df = pd.DataFrame(np.array(apple_app.reviews), columns=['review'])
apple_app_df_final = apple_app_df.join(pd.DataFrame(apple_app_df.pop('review').tolist()))
# Save DataFrame to Excel file
apple_app_df_final.to_excel('apple_result.xlsx', index=False)
print('Apple store review scrap done')
def google_app_scraper():
'''
e.g. https://play.google.com/store/apps/details?id=tw.com.trtc.is.android05&hl=zh_TW&gl=US
'''
us_reviews = reviews_all(
'tw.com.trtc.is.android05',
sleep_milliseconds=0, # defaults to 0
lang='zh_TW', # defaults to 'en' see the url hl='your language code'
country='us', # defaults to 'us'
sort=Sort.NEWEST, # defaults to Sort.MOST_RELEVANT
)
df_google_app = pd.DataFrame(np.array(us_reviews),columns=['review'])
df_google_app = df_google_app.join(pd.DataFrame(df_google_app.pop('review').tolist()))
# Save DataFrame to Excel file
df_google_app.to_excel('google_result.xlsx', index=False)
print('Google review scrap done')
if __name__ == '__main__':
apple_app_scraper()
google_app_scraper()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment