-
-
Save tech234a/9ee4ee9489f4ca1a03e3c7e144db7f91 to your computer and use it in GitHub Desktop.
Chrome Web Store Discovery Script - single region, item type, and language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Chrome Web Store Discovery Script | |
# By tech234a, October 2020 | |
# This script works by taking accessing the "infiniteWall" of items that appears after scrolling past all of the collections within | |
# the homepage for an item type. Even infinity comes to an end. | |
# CONFIGURATION | |
# Item type | |
# Possible values: extensions, themes, apps, app/3-games, collection/[collection name] | |
CATEGORY = 'extensions' | |
# Language code | |
# Possible values from docs: https://developer.chrome.com/webstore/i18n#localeTable | |
# "ar", "am", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "en_GB", "en_US", "es", "es_419", "et", "fa", "fi", "fil", "fr", "gu", | |
# "he", "hi", "hr", "hu", "id", "it", "ja", "kn", "ko", "lt", "lv", "ml", "mr", "ms", "nl", "no", "pl", "pt_BR", "pt_PT", "ro", "ru", | |
# "sk", "sl", "sr", "sv", "sw", "ta", "te", "th", "tr", "uk", "vi", "zh_CN", "zh_TW" | |
LANGUAGE = 'en' | |
# Country code (2 letters) | |
# Possible values from Chrome Developer Dashboard: | |
# "AR", "AU", "AT", "BE", "BR", "BG", "CA", "CL", "CN", "CO", "CU", "CZ", "DK", "EC", "EG", "EE", "FI", "FR", "DE", "GR", "HK", "HU", | |
# "IN", "ID", "IE", "IL", "IT", "JP", "LT", "MY", "MX", "MA", "NL", "NZ", "NO", "PA", "PE", "PH", "PL", "PT", "RO", "RU", "SA", "SG", | |
# "SK", "ZA", "ES", "SE", "CH", "TW", "TH", "TR", "AE", "UA", "GB", "US", "VE", "VN" | |
# Also: "001" for "worldwide", which refers to items that are listed in all countries so is actually the smallest subset of items | |
COUNTRYCODE = '001' | |
# Number of items to get per page: max 210, but the UI normally requests 96 | |
CNTPAGE = 210 | |
# SCRIPT | |
from requests import session | |
from json import loads | |
requests = session() | |
discoveredids = set() | |
totalreqs = 1 | |
print("Total discovered:", len(discoveredids)) | |
print("Request number:", totalreqs) | |
# Make the initial request | |
params = ( | |
('hl', LANGUAGE), | |
('gl', COUNTRYCODE), | |
('pv', '20201016'), | |
('requestedCounts', 'infiniteWall:'+str(CNTPAGE)+':0:true'), | |
('category', CATEGORY), | |
) | |
response = requests.post('https://chrome.google.com/webstore/ajax/item', params=params) | |
parsed = loads(response.text[6:]) | |
token = parsed[1][4] | |
for item in parsed[1][1]: | |
# there's actually a lot of other metadata that can be collected here if interested | |
# including title, short description, author, price, average rating, category, icon URL, | |
# and more, but for now this script just gets the ID | |
discoveredids.add(item[0]) | |
while True: | |
totalreqs += 1 | |
print("Total discovered:", len(discoveredids)) | |
print("Request number:", totalreqs) | |
params = ( | |
('hl', LANGUAGE), | |
('gl', COUNTRYCODE), | |
('pv', '20201016'), | |
('requestedCounts', 'infiniteWall:'+str(CNTPAGE)+':0:true'), | |
('token', token), # this pagination token isn't random, but it's provided in the previous request so I'm using it anyway | |
('category', CATEGORY), | |
) | |
response = requests.post('https://chrome.google.com/webstore/ajax/item', params=params) | |
parsed = loads(response.text[6:]) | |
token = parsed[1][4] | |
for item in parsed[1][1]: | |
# there's actually a lot of other metadata that can be collected here if interested | |
# including title, short description, author, price, average rating, category, icon URL, | |
# and more, but for now this script just gets the ID | |
discoveredids.add(item[0]) | |
if len(parsed[1][1]) != CNTPAGE: | |
break | |
print("FINAL NUMBER OF DISCOVERED ITEMS:", len(discoveredids)) | |
open("out_"+CATEGORY+"_"+LANGUAGE+"_"+COUNTRYCODE+".txt", "w").write("\n".join(sorted(discoveredids))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment