Created
December 1, 2017 03:37
-
-
Save lkolbly/8d657411cc50e1e01b164778b6239523 to your computer and use it in GitHub Desktop.
My horrible horrible code for analyzing bechdeltest.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import hashlib | |
import time | |
TMDB_BASE_URL = "https://api.themoviedb.org/3/" | |
TMDB_API_KEY = "<YOUR KEY HERE>" | |
GENRES = { | |
"genres": [ | |
{ | |
"id": 28, | |
"name": "Action" | |
}, | |
{ | |
"id": 12, | |
"name": "Adventure" | |
}, | |
{ | |
"id": 16, | |
"name": "Animation" | |
}, | |
{ | |
"id": 35, | |
"name": "Comedy" | |
}, | |
{ | |
"id": 80, | |
"name": "Crime" | |
}, | |
{ | |
"id": 99, | |
"name": "Documentary" | |
}, | |
{ | |
"id": 18, | |
"name": "Drama" | |
}, | |
{ | |
"id": 10751, | |
"name": "Family" | |
}, | |
{ | |
"id": 14, | |
"name": "Fantasy" | |
}, | |
{ | |
"id": 36, | |
"name": "History" | |
}, | |
{ | |
"id": 27, | |
"name": "Horror" | |
}, | |
{ | |
"id": 10402, | |
"name": "Music" | |
}, | |
{ | |
"id": 9648, | |
"name": "Mystery" | |
}, | |
{ | |
"id": 10749, | |
"name": "Romance" | |
}, | |
{ | |
"id": 878, | |
"name": "Science Fiction" | |
}, | |
{ | |
"id": 10770, | |
"name": "TV Movie" | |
}, | |
{ | |
"id": 53, | |
"name": "Thriller" | |
}, | |
{ | |
"id": 10752, | |
"name": "War" | |
}, | |
{ | |
"id": 37, | |
"name": "Western" | |
} | |
] | |
} | |
genres = {} | |
for g in GENRES["genres"]: | |
genres[g["id"]] = g["name"] | |
# Caching TMDB request wrapper | |
def makeTmdbApiRequest(command, params={}): | |
h = hashlib.sha224((TMDB_BASE_URL+command+json.dumps(params)).encode()).hexdigest() | |
params["api_key"] = TMDB_API_KEY | |
try: | |
return json.loads(open("apicache/%s.json"%h).read()) | |
except: | |
print("Making request '%s' with params '%s'"%(command, params)) | |
print(" Hash: '%s'"%h) | |
r = requests.get(TMDB_BASE_URL+command, params=params) | |
d = r.json() | |
#print("Made request '%s' with hash '%s'"%(command, h)) | |
#print("(params: '%s')"%json.dumps(params)) | |
time.sleep(5) | |
open("apicache/%s.json"%h, "w").write(json.dumps(d)) | |
return d | |
bechdelScores = json.load(open("bechdeltest.json")) | |
print(bechdelScores[0]) | |
for i in range(len(bechdelScores)): | |
bechdel = bechdelScores[i] | |
res = makeTmdbApiRequest("find/tt%s"%bechdel["imdbid"], params={"external_source": "imdb_id"}) | |
if len(res["movie_results"]) > 0: | |
print(res["movie_results"][0]["release_date"], res["movie_results"][0]["original_title"], bechdel["rating"]) | |
bechdelScores[i]["movie"] = res["movie_results"][0] | |
print(bechdelScores[-1]) | |
bechdelScores = filter(lambda score: "movie" in score and score["movie"].get("release_date") is not None, bechdelScores) | |
bechdelScores = map(lambda entry: { | |
"title": entry["title"], | |
"year": int(entry["movie"]["release_date"].split("-")[0]), | |
"lang": entry["movie"]["original_language"], | |
"vote_avg": entry["movie"]["vote_average"], | |
"popularity": entry["movie"]["popularity"], | |
"genres": entry["movie"]["genre_ids"], | |
"bechdel": int(entry["rating"]) | |
}, bechdelScores) | |
bs = list(bechdelScores) | |
def aggregateBy(objects, keyFn, aggrFn, valueFn=lambda x: x): | |
result = {} | |
for o in objects: | |
key = keyFn(o) | |
if key in result: | |
result[key].append(valueFn(o)) | |
else: | |
result[key] = [valueFn(o)] | |
aggregated = {} | |
for k,v in result.items(): | |
aggregated[k] = aggrFn(v) | |
return aggregated | |
def multiAggregateBy(objects, keysFn, aggrFn, valueFn=lambda x: x): | |
result = {} | |
for o in objects: | |
keys = keysFn(o) | |
for key in keys: | |
if key in result: | |
result[key].append(valueFn(o)) | |
else: | |
result[key] = [valueFn(o)] | |
aggregated = {} | |
for k,v in result.items(): | |
aggregated[k] = aggrFn(v) | |
return aggregated | |
def avg(items): | |
return sum(items) / len(items) | |
for year,rating in aggregateBy(bs, lambda m: m["year"], avg, lambda v: v["bechdel"]).items(): | |
print(year, rating) | |
# Group by genre | |
genre_avgs = list(multiAggregateBy(bs, lambda m: m["genres"], avg, lambda v: v["bechdel"]).items()) | |
genre_avgs.sort(key=lambda x: x[1]) | |
for genre, rating in genre_avgs: | |
print("%16s"%genres.get(genre, genre), "%.3f"%rating) | |
# Group by... both? | |
byYear = {} | |
for bechdelScore in bs: | |
year = int(bechdelScore['year'] / 1) * 1 | |
if year not in byYear: | |
byYear[year] = [] | |
byYear[year].append(bechdelScore) | |
pass | |
for year,scores in sorted(list(byYear.items())): | |
genre_avgs = list(multiAggregateBy(scores, lambda m: m["genres"], avg, lambda v: v["bechdel"]).values()) | |
spread = max(genre_avgs) - min(genre_avgs) | |
print(year, spread) | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment