tech234a/infinitewall_all.py Secret

## infinitewall_all.py
# Chrome Web Store Discovery Script
# By tech234a, October 2020

# This script works by taking accessing the "infiniteWall" of items that appears after scrolling past all of the collections within
# the homepage for an item type. Even infinity comes to an end.

# CONFIGURATION

# Item type
# Possible values: extensions, themes, apps, app/3-games, collection/[collection name]
CATEGORIES = ["extensions", "themes", "apps", "app/3-games"]
# CATEGORY = 'extensions'

# Language code
# Possible values from docs: https://developer.chrome.com/webstore/i18n#localeTable
# "ar", "am", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "en_GB", "en_US", "es", "es_419", "et", "fa", "fi", "fil", "fr", "gu",
# "he", "hi", "hr", "hu", "id", "it", "ja", "kn", "ko", "lt", "lv", "ml", "mr", "ms", "nl", "no", "pl", "pt_BR", "pt_PT", "ro", "ru",
# "sk", "sl", "sr", "sv", "sw", "ta", "te", "th", "tr", "uk", "vi", "zh_CN", "zh_TW"
LANGUAGE = 'en'

# Country code (2 letters)
# Possible values from Chrome Developer Dashboard:
COUNTRYCODES = ["AR", "AU", "AT", "BE", "BR", "BG", "CA", "CL", "CN", "CO", "CU", "CZ", "DK", "EC", "EG", "EE", "FI", "FR", "DE", "GR",
"HK", "HU", "IN", "ID", "IE", "IL", "IT", "JP", "LT", "MY", "MX", "MA", "NL", "NZ", "NO", "PA", "PE", "PH", "PL", "PT", "RO", "RU",
"SA", "SG", "SK", "ZA", "ES", "SE", "CH", "TW", "TH", "TR", "AE", "UA", "GB", "US", "VE", "VN", "001"]
# Also: "001" for "worldwide", which refers to items that are listed in all countries so is actually the smallest subset of items
# COUNTRYCODE = '001'

# Number of items to get per page: max 210, but the UI normally requests 96
CNTPAGE = 210

# SCRIPT

from requests import session
from json import loads
from threading import Thread
from time import sleep

requests = session()

discoveredids = set()

from queue import Queue

jobs = Queue()

def doit():
    while not jobs.empty():
        COUNTRYCODE = jobs.get()
        for CATEGORY in CATEGORIES:
            totalreqs = 1

            print("Total discovered:", len(discoveredids))
            print("Request number:", totalreqs)

            # Make the initial request
            params = (
                ('hl', LANGUAGE),
                ('gl', COUNTRYCODE),
                ('pv', '20201016'),
                ('requestedCounts', 'infiniteWall:'+str(CNTPAGE)+':0:true'),
                ('category', CATEGORY),
            )
            response = requests.post('https://chrome.google.com/webstore/ajax/item', params=params)
            parsed = loads(response.text[6:])

            token = parsed[1][4]

            for item in parsed[1][1]:
                # there's actually a lot of other metadata that can be collected here if interested
                # including title, short description, author, price, average rating, category, icon URL,
                # and more, but for now this script just gets the ID
                discoveredids.add(item[0])

            while True:
                totalreqs += 1
                print("Total discovered:", len(discoveredids))
                print("Request number:", totalreqs)
                params = (
                ('hl', LANGUAGE),
                ('gl', COUNTRYCODE),
                ('pv', '20201016'),
                ('requestedCounts', 'infiniteWall:'+str(CNTPAGE)+':0:true'),
                ('token', token), # this pagination token isn't random, but it's provided in the previous request so I'm using it anyway
                ('category', CATEGORY),
            )
                response = requests.post('https://chrome.google.com/webstore/ajax/item', params=params)
                parsed = loads(response.text[6:])
                token = parsed[1][4]

                for item in parsed[1][1]:
                    # there's actually a lot of other metadata that can be collected here if interested
                    # including title, short description, author, price, average rating, category, icon URL,
                    # and more, but for now this script just gets the ID
                    discoveredids.add(item[0])

                if len(parsed[1][1]) != CNTPAGE:
                    break

threads = []

for item in COUNTRYCODES:
    jobs.put(item)

for i in range(20):
    runthread = Thread(target=doit)
    runthread.start()
    threads.append(runthread)
    del runthread

#https://stackoverflow.com/a/11968881
for x in threads:
    x.join()
    threads.remove(x)
    del x

print("Waiting 5 minutes to ensure all threads have completed...")
sleep(300)

print("FINAL NUMBER OF DISCOVERED ITEMS:", len(discoveredids))
open("out.txt", "w").write("\n".join(sorted(discoveredids)))
	# Chrome Web Store Discovery Script
	# By tech234a, October 2020

	# This script works by taking accessing the "infiniteWall" of items that appears after scrolling past all of the collections within
	# the homepage for an item type. Even infinity comes to an end.

	# CONFIGURATION

	# Item type
	# Possible values: extensions, themes, apps, app/3-games, collection/[collection name]
	CATEGORIES = ["extensions", "themes", "apps", "app/3-games"]
	# CATEGORY = 'extensions'

	# Language code
	# Possible values from docs: https://developer.chrome.com/webstore/i18n#localeTable
	# "ar", "am", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "en_GB", "en_US", "es", "es_419", "et", "fa", "fi", "fil", "fr", "gu",
	# "he", "hi", "hr", "hu", "id", "it", "ja", "kn", "ko", "lt", "lv", "ml", "mr", "ms", "nl", "no", "pl", "pt_BR", "pt_PT", "ro", "ru",
	# "sk", "sl", "sr", "sv", "sw", "ta", "te", "th", "tr", "uk", "vi", "zh_CN", "zh_TW"
	LANGUAGE = 'en'

	# Country code (2 letters)
	# Possible values from Chrome Developer Dashboard:
	COUNTRYCODES = ["AR", "AU", "AT", "BE", "BR", "BG", "CA", "CL", "CN", "CO", "CU", "CZ", "DK", "EC", "EG", "EE", "FI", "FR", "DE", "GR",
	"HK", "HU", "IN", "ID", "IE", "IL", "IT", "JP", "LT", "MY", "MX", "MA", "NL", "NZ", "NO", "PA", "PE", "PH", "PL", "PT", "RO", "RU",
	"SA", "SG", "SK", "ZA", "ES", "SE", "CH", "TW", "TH", "TR", "AE", "UA", "GB", "US", "VE", "VN", "001"]
	# Also: "001" for "worldwide", which refers to items that are listed in all countries so is actually the smallest subset of items
	# COUNTRYCODE = '001'

	# Number of items to get per page: max 210, but the UI normally requests 96
	CNTPAGE = 210

	# SCRIPT

	from requests import session
	from json import loads
	from threading import Thread
	from time import sleep

	requests = session()

	discoveredids = set()

	from queue import Queue

	jobs = Queue()

	def doit():
	while not jobs.empty():
	COUNTRYCODE = jobs.get()
	for CATEGORY in CATEGORIES:
	totalreqs = 1

	print("Total discovered:", len(discoveredids))
	print("Request number:", totalreqs)

	# Make the initial request
	params = (
	('hl', LANGUAGE),
	('gl', COUNTRYCODE),
	('pv', '20201016'),
	('requestedCounts', 'infiniteWall:'+str(CNTPAGE)+':0:true'),
	('category', CATEGORY),
	)
	response = requests.post('https://chrome.google.com/webstore/ajax/item', params=params)
	parsed = loads(response.text[6:])

	token = parsed[1][4]

	for item in parsed[1][1]:
	# there's actually a lot of other metadata that can be collected here if interested
	# including title, short description, author, price, average rating, category, icon URL,
	# and more, but for now this script just gets the ID
	discoveredids.add(item[0])

	while True:
	totalreqs += 1
	print("Total discovered:", len(discoveredids))
	print("Request number:", totalreqs)
	params = (
	('hl', LANGUAGE),
	('gl', COUNTRYCODE),
	('pv', '20201016'),
	('requestedCounts', 'infiniteWall:'+str(CNTPAGE)+':0:true'),
	('token', token), # this pagination token isn't random, but it's provided in the previous request so I'm using it anyway
	('category', CATEGORY),
	)
	response = requests.post('https://chrome.google.com/webstore/ajax/item', params=params)
	parsed = loads(response.text[6:])
	token = parsed[1][4]

	for item in parsed[1][1]:
	# there's actually a lot of other metadata that can be collected here if interested
	# including title, short description, author, price, average rating, category, icon URL,
	# and more, but for now this script just gets the ID
	discoveredids.add(item[0])

	if len(parsed[1][1]) != CNTPAGE:
	break

	threads = []

	for item in COUNTRYCODES:
	jobs.put(item)

	for i in range(20):
	runthread = Thread(target=doit)
	runthread.start()
	threads.append(runthread)
	del runthread

	#https://stackoverflow.com/a/11968881
	for x in threads:
	x.join()
	threads.remove(x)
	del x

	print("Waiting 5 minutes to ensure all threads have completed...")
	sleep(300)

	print("FINAL NUMBER OF DISCOVERED ITEMS:", len(discoveredids))
	open("out.txt", "w").write("\n".join(sorted(discoveredids)))