Created
August 31, 2020 08:19
-
-
Save cod3rboy/0b0e89434f0abb9f8ddc8290cf4f00f7 to your computer and use it in GitHub Desktop.
A python script to scrap an android application info from play store.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
app_package_name = input("Enter android app package name : ") | |
app_url = "https://play.google.com/store/apps/details?id=" + app_package_name | |
page = requests.get(app_url) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
# App Name | |
app_name = soup.find('h1', class_='AHFaub').text | |
print("App Name:", app_name) | |
# Developer Name | |
dev_name_container = soup.find('div', class_='ZVWMWc').findChild('div', class_='qQKdcc').findChild('span') | |
print("Developer Name:", dev_name_container.text) | |
# App Category | |
app_category_container = dev_name_container.findNextSibling('span') | |
print("App Category :", app_category_container.text) | |
# App Icon URL | |
app_icon_url = soup.find('div', class_='xSyT2c').findChild('img').get('src') | |
print("App Icon URL:", app_icon_url) | |
# Total Ratings | |
total_ratings_container = soup.find('span', class_='AYi5wd') | |
print("Total Ratings :", total_ratings_container.text) | |
# App Description | |
description_container = soup.find('div', attrs={'jsname': 'sngebd'}) | |
print("App Description :", description_container.encode_contents()) | |
# Average Store Rating | |
store_rating_container = soup.find('div', class_='BHMmbe') | |
print("Average Store Rating : ", store_rating_container.text, '/5', sep='') | |
# Whats New Section | |
whats_new_ancestor_container = soup.find('c-wiz', attrs={'jsrenderer': 'eG38Ge'}) | |
whats_new_container = whats_new_ancestor_container.findChild('div', class_='DWPxHb').findChild('span') | |
print("What's New Section :", whats_new_container.encode_contents()) | |
# Additional Information Section | |
additional_info_container = soup.find('c-wiz', attrs={'jsrenderer': 'HEOg8'}).findChild('div', class_='IxB2fe') | |
additional_info_list = additional_info_container.findChildren('div', class_='hAyfc', recursive=False) | |
for info in additional_info_list: | |
info_name_container = info.findChild('div', class_='BgcNfc', recursive=False) | |
info_name = info_name_container.text | |
info_value_container = info.findChild('span', class_='htlgb', recursive=False) | |
if info_name.lower() == 'content rating': | |
info_value_container = info_value_container.findChild('span', class_='htlgb').findChild('div') | |
info_value = info_value_container.text | |
elif info_name.lower() == 'developer': | |
info_value_container = info_value_container.findChild('span', class_='htlgb') | |
dev_details_list = info_value_container.findChildren('div') | |
details_list = list() | |
for dev_detail in dev_details_list: | |
if len(list(dev_detail.children)) > 0: | |
link_element = dev_detail.find('a') | |
if link_element is not None and link_element.get('href').startswith('http'): | |
details_list.append(link_element.text + " - " + link_element.get('href')) | |
else: | |
details_list.append(dev_detail.text) | |
else: | |
details_list.append(dev_detail.text) | |
info_value = '\n' + '\n'.join(details_list) | |
else: | |
info_value = info_value_container.text | |
print(info_name, ":", info_value) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment