Skip to content

Instantly share code, notes, and snippets.

@budidino
Last active March 23, 2021 14:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save budidino/4a2f88b6e34d7f161885f0c760642650 to your computer and use it in GitHub Desktop.
Save budidino/4a2f88b6e34d7f161885f0c760642650 to your computer and use it in GitHub Desktop.
get App Store reviews
import xmltodict
import requests
import datetime
import multiprocessing as mp
COUNTRY_CODES = {
"AF": "AFGHANISTAN",
# "AX": "ÅLAND ISLANDS",
"AL": "ALBANIA",
"DZ": "ALGERIA",
# "AS": "AMERICAN SAMOA",
"AD": "ANDORRA",
"AO": "ANGOLA",
"AI": "ANGUILLA",
# "AQ": "ANTARCTICA",
"AG": "ANTIGUA AND BARBUDA",
"AR": "ARGENTINA",
"AM": "ARMENIA",
# "AW": "ARUBA",
"AU": "AUSTRALIA",
"AT": "AUSTRIA",
"AZ": "AZERBAIJAN",
"BS": "BAHAMAS",
"BH": "BAHRAIN",
"BD": "BANGLADESH",
"BB": "BARBADOS",
"BY": "BELARUS",
"BE": "BELGIUM",
"BZ": "BELIZE",
"BJ": "BENIN",
"BM": "BERMUDA",
"BT": "BHUTAN",
"BO": "BOLIVIA, PLURINATIONAL STATE OF",
# "BQ": "BONAIRE, SINT EUSTATIUS AND SABA",
"BA": "BOSNIA AND HERZEGOVINA",
"BW": "BOTSWANA",
# "BV": "BOUVET ISLAND",
"BR": "BRAZIL",
# "IO": "BRITISH INDIAN OCEAN TERRITORY",
"BN": "BRUNEI DARUSSALAM",
"BG": "BULGARIA",
"BF": "BURKINA FASO",
# "BI": "BURUNDI",
"KH": "CAMBODIA",
"CM": "CAMEROON",
"CA": "CANADA",
"CV": "CAPE VERDE",
"KY": "CAYMAN ISLANDS",
"CF": "CENTRAL AFRICAN REPUBLIC",
"TD": "CHAD",
"CL": "CHILE",
"CN": "CHINA",
# "CX": "CHRISTMAS ISLAND",
# "CC": "COCOS (KEELING) ISLANDS",
"CO": "COLOMBIA",
# "KM": "COMOROS",
"CG": "CONGO",
"CD": "CONGO, THE DEMOCRATIC REPUBLIC OF THE",
# "CK": "COOK ISLANDS",
"CR": "COSTA RICA",
"CI": "CÔTE D'IVOIRE",
"HR": "CROATIA",
# "CU": "CUBA",
# "CW": "CURAÇAO",
"CY": "CYPRUS",
"CZ": "CZECH REPUBLIC",
"DK": "DENMARK",
# "DJ": "DJIBOUTI",
"DM": "DOMINICA",
"DO": "DOMINICAN REPUBLIC",
"EC": "ECUADOR",
"EG": "EGYPT",
"SV": "EL SALVADOR",
# "GQ": "EQUATORIAL GUINEA",
# "ER": "ERITREA",
"EE": "ESTONIA",
"ET": "ETHIOPIA",
# "FK": "FALKLAND ISLANDS (MALVINAS)",
# "FO": "FAROE ISLANDS",
"FJ": "FIJI",
"FI": "FINLAND",
"FR": "FRANCE",
# "GF": "FRENCH GUIANA",
# "PF": "FRENCH POLYNESIA",
# "TF": "FRENCH SOUTHERN TERRITORIES",
"GA": "GABON",
"GM": "GAMBIA",
"GE": "GEORGIA",
"DE": "GERMANY",
"GH": "GHANA",
# "GI": "GIBRALTAR",
"GR": "GREECE",
# "GL": "GREENLAND",
"GD": "GRENADA",
# "GP": "GUADELOUPE",
# "GU": "GUAM",
"GT": "GUATEMALA",
# "GG": "GUERNSEY",
"GN": "GUINEA",
"GW": "GUINEA-BISSAU",
"GY": "GUYANA",
# "HT": "HAITI",
# "HM": "HEARD ISLAND AND MCDONALD ISLANDS",
# "VA": "HOLY SEE (VATICAN CITY STATE)",
"HN": "HONDURAS",
"HK": "HONG KONG",
"HU": "HUNGARY",
"IS": "ICELAND",
"IN": "INDIA",
"ID": "INDONESIA",
# "IR": "IRAN, ISLAMIC REPUBLIC OF",
"IQ": "IRAQ",
"IE": "IRELAND",
# "IM": "ISLE OF MAN",
"IL": "ISRAEL",
"IT": "ITALY",
"JM": "JAMAICA",
"JP": "JAPAN",
# "JE": "JERSEY",
"JO": "JORDAN",
"KZ": "KAZAKHSTAN",
"KE": "KENYA",
# "KI": "KIRIBATI",
# "KP": "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF",
"KR": "KOREA, REPUBLIC OF",
"KW": "KUWAIT",
"KG": "KYRGYZSTAN",
"LA": "LAO PEOPLE'S DEMOCRATIC REPUBLIC",
"LV": "LATVIA",
"LB": "LEBANON",
# "LS": "LESOTHO",
"LR": "LIBERIA",
"LY": "LIBYA",
"LI": "LIECHTENSTEIN",
"LT": "LITHUANIA",
"LU": "LUXEMBOURG",
"MO": "MACAO",
"MK": "MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF",
"MG": "MADAGASCAR",
"MW": "MALAWI",
"MY": "MALAYSIA",
"MV": "MALDIVES",
"ML": "MALI",
"MT": "MALTA",
# "MH": "MARSHALL ISLANDS",
# "MQ": "MARTINIQUE",
"MR": "MAURITANIA",
"MU": "MAURITIUS",
# "YT": "MAYOTTE",
"MX": "MEXICO",
"FM": "MICRONESIA, FEDERATED STATES OF",
"MD": "MOLDOVA, REPUBLIC OF",
"MC": "MONACO",
"MN": "MONGOLIA",
"ME": "MONTENEGRO",
"MS": "MONTSERRAT",
"MA": "MOROCCO",
"MZ": "MOZAMBIQUE",
"MM": "MYANMAR",
"NA": "NAMIBIA",
"NR": "NAURU",
"NP": "NEPAL",
"NL": "NETHERLANDS",
# "NC": "NEW CALEDONIA",
"NZ": "NEW ZEALAND",
"NI": "NICARAGUA",
"NE": "NIGER",
"NG": "NIGERIA",
# "NU": "NIUE",
# "NF": "NORFOLK ISLAND",
# "MP": "NORTHERN MARIANA ISLANDS",
"NO": "NORWAY",
"OM": "OMAN",
"PK": "PAKISTAN",
"PW": "PALAU",
"PS": "PALESTINE, STATE OF",
"PA": "PANAMA",
"PG": "PAPUA NEW GUINEA",
"PY": "PARAGUAY",
"PE": "PERU",
"PH": "PHILIPPINES",
# "PN": "PITCAIRN",
"PL": "POLAND",
"PT": "PORTUGAL",
# "PR": "PUERTO RICO",
"QA": "QATAR",
# "RE": "RÉUNION",
"RO": "ROMANIA",
"RU": "RUSSIAN FEDERATION",
"RW": "RWANDA",
# "BL": "SAINT BARTHÉLEMY",
# "SH": "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA",
"KN": "SAINT KITTS AND NEVIS",
"LC": "SAINT LUCIA",
# "MF": "SAINT MARTIN (FRENCH PART)",
# "PM": "SAINT PIERRE AND MIQUELON",
"VC": "SAINT VINCENT AND THE GRENADINES",
"WS": "SAMOA",
# "SM": "SAN MARINO",
"ST": "SAO TOME AND PRINCIPE",
"SA": "SAUDI ARABIA",
"SN": "SENEGAL",
"RS": "SERBIA",
"SC": "SEYCHELLES",
"SL": "SIERRA LEONE",
"SG": "SINGAPORE",
# "SX": "SINT MAARTEN (DUTCH PART)",
"SK": "SLOVAKIA",
"SI": "SLOVENIA",
"SB": "SOLOMON ISLANDS",
# "SO": "SOMALIA",
"ZA": "SOUTH AFRICA",
# "GS": "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS",
# "SS": "SOUTH SUDAN",
"ES": "SPAIN",
"LK": "SRI LANKA",
# "SD": "SUDAN",
"SR": "SURINAME",
# "SJ": "SVALBARD AND JAN MAYEN",
"SZ": "SWAZILAND",
"SE": "SWEDEN",
"CH": "SWITZERLAND",
# "SY": "SYRIAN ARAB REPUBLIC",
"TW": "TAIWAN, PROVINCE OF CHINA",
"TJ": "TAJIKISTAN",
"TZ": "TANZANIA, UNITED REPUBLIC OF",
"TH": "THAILAND",
# "TL": "TIMOR-LESTE",
# "TG": "TOGO",
# "TK": "TOKELAU",
"TO": "TONGA",
"TT": "TRINIDAD AND TOBAGO",
"TN": "TUNISIA",
"TR": "TURKEY",
"TM": "TURKMENISTAN",
"TC": "TURKS AND CAICOS ISLANDS",
# "TV": "TUVALU",
"UG": "UGANDA",
"UA": "UKRAINE",
"AE": "UNITED ARAB EMIRATES",
"GB": "UNITED KINGDOM",
"US": "UNITED STATES",
# "UM": "UNITED STATES MINOR OUTLYING ISLANDS",
"UY": "URUGUAY",
"UZ": "UZBEKISTAN",
"VU": "VANUATU",
"VE": "VENEZUELA, BOLIVARIAN REPUBLIC OF",
"VN": "VIET NAM",
"VG": "VIRGIN ISLANDS, BRITISH",
# "VI": "VIRGIN ISLANDS, U.S.",
# "WF": "WALLIS AND FUTUNA",
# "EH": "WESTERN SAHARA",
"YE": "YEMEN",
"ZM": "ZAMBIA",
"ZW": "ZIMBABWE",
}
class Review:
def __init__(self, updated, country, title, review, rating, version, author):
self.updated = updated
self.country = country
self.title = title
self.review = review
self.rating = rating
self.version = version
self.author = author
def review_from_entry(entry, country) -> Review:
updated = datetime.datetime.fromisoformat(entry["updated"])
title = entry["title"]
review = entry["content"][0]["#text"]
rating = entry["im:rating"]
version = entry["im:version"]
author = entry["author"]["name"]
return Review(updated, country, title, review, rating, version, author)
def reviews_for_country_code(app_id, country_code) -> [Review]:
reviews = []
url = f"https://itunes.apple.com/{country_code}/rss/customerreviews/id={app_id}/xml"
headers = {'User-Agent': 'Mozilla/5.0', 'Cache-Control': 'no-cache'}
response = requests.get(url, headers=headers)
if response.status_code == 400:
print(f"NO APP STORE: {country_code}")
return reviews
data = xmltodict.parse(response.content)
if "entry" in data['feed'].keys(): # has review entries
entries = data['feed']['entry']
if isinstance(entries, list): # list of reviews
for entry in data["feed"]["entry"]:
reviews.append(review_from_entry(entry, country_code))
elif "title" in entries.keys(): # only one review
reviews.append(review_from_entry(entries, country_code))
# print(f"{country_code}: {len(reviews)}")
return reviews
def get_reviews(app_id, country_codes):
pool = mp.Pool()
for country_code in country_codes:
pool.apply_async(reviews_for_country_code, args = (app_id, country_code, ), callback = log_reviews)
pool.close()
pool.join()
review_list = []
def log_reviews(reviews):
global review_list
review_list += reviews
app_id = "428395953"
if __name__ == '__main__':
get_reviews(app_id, COUNTRY_CODES)
sorted_reviews = sorted(review_list, key=lambda review: review.updated, reverse=True)
review_sum = 0
for review in sorted_reviews:
review_sum += int(review.rating)
print(f"\ndate: {review.updated}\ncountry: {COUNTRY_CODES[review.country]}\nversion: {review.version}\nrating: {review.rating}\nauthor: {review.author}\ntitle: {review.title}\nreview:\n{review.review}\n")
print(f"REVIEWS: {len(review_list)}")
print(f"RATING: {round(review_sum / len(review_list), 2)}")
@budidino
Copy link
Author

added multithreading with the second revision

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment