Skip to content

Instantly share code, notes, and snippets.

@cod3rboy
Created August 31, 2020 08:19
Show Gist options
  • Save cod3rboy/0b0e89434f0abb9f8ddc8290cf4f00f7 to your computer and use it in GitHub Desktop.
Save cod3rboy/0b0e89434f0abb9f8ddc8290cf4f00f7 to your computer and use it in GitHub Desktop.
A python script to scrap an android application info from play store.
import requests
from bs4 import BeautifulSoup
app_package_name = input("Enter android app package name : ")
app_url = "https://play.google.com/store/apps/details?id=" + app_package_name
page = requests.get(app_url)
soup = BeautifulSoup(page.content, 'html.parser')
# App Name
app_name = soup.find('h1', class_='AHFaub').text
print("App Name:", app_name)
# Developer Name
dev_name_container = soup.find('div', class_='ZVWMWc').findChild('div', class_='qQKdcc').findChild('span')
print("Developer Name:", dev_name_container.text)
# App Category
app_category_container = dev_name_container.findNextSibling('span')
print("App Category :", app_category_container.text)
# App Icon URL
app_icon_url = soup.find('div', class_='xSyT2c').findChild('img').get('src')
print("App Icon URL:", app_icon_url)
# Total Ratings
total_ratings_container = soup.find('span', class_='AYi5wd')
print("Total Ratings :", total_ratings_container.text)
# App Description
description_container = soup.find('div', attrs={'jsname': 'sngebd'})
print("App Description :", description_container.encode_contents())
# Average Store Rating
store_rating_container = soup.find('div', class_='BHMmbe')
print("Average Store Rating : ", store_rating_container.text, '/5', sep='')
# Whats New Section
whats_new_ancestor_container = soup.find('c-wiz', attrs={'jsrenderer': 'eG38Ge'})
whats_new_container = whats_new_ancestor_container.findChild('div', class_='DWPxHb').findChild('span')
print("What's New Section :", whats_new_container.encode_contents())
# Additional Information Section
additional_info_container = soup.find('c-wiz', attrs={'jsrenderer': 'HEOg8'}).findChild('div', class_='IxB2fe')
additional_info_list = additional_info_container.findChildren('div', class_='hAyfc', recursive=False)
for info in additional_info_list:
info_name_container = info.findChild('div', class_='BgcNfc', recursive=False)
info_name = info_name_container.text
info_value_container = info.findChild('span', class_='htlgb', recursive=False)
if info_name.lower() == 'content rating':
info_value_container = info_value_container.findChild('span', class_='htlgb').findChild('div')
info_value = info_value_container.text
elif info_name.lower() == 'developer':
info_value_container = info_value_container.findChild('span', class_='htlgb')
dev_details_list = info_value_container.findChildren('div')
details_list = list()
for dev_detail in dev_details_list:
if len(list(dev_detail.children)) > 0:
link_element = dev_detail.find('a')
if link_element is not None and link_element.get('href').startswith('http'):
details_list.append(link_element.text + " - " + link_element.get('href'))
else:
details_list.append(dev_detail.text)
else:
details_list.append(dev_detail.text)
info_value = '\n' + '\n'.join(details_list)
else:
info_value = info_value_container.text
print(info_name, ":", info_value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment