Skip to content

Instantly share code, notes, and snippets.

@lkolbly
Created December 1, 2017 03:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lkolbly/8d657411cc50e1e01b164778b6239523 to your computer and use it in GitHub Desktop.
Save lkolbly/8d657411cc50e1e01b164778b6239523 to your computer and use it in GitHub Desktop.
My horrible horrible code for analyzing bechdeltest.com
import json
import requests
import hashlib
import time
TMDB_BASE_URL = "https://api.themoviedb.org/3/"
TMDB_API_KEY = "<YOUR KEY HERE>"
GENRES = {
"genres": [
{
"id": 28,
"name": "Action"
},
{
"id": 12,
"name": "Adventure"
},
{
"id": 16,
"name": "Animation"
},
{
"id": 35,
"name": "Comedy"
},
{
"id": 80,
"name": "Crime"
},
{
"id": 99,
"name": "Documentary"
},
{
"id": 18,
"name": "Drama"
},
{
"id": 10751,
"name": "Family"
},
{
"id": 14,
"name": "Fantasy"
},
{
"id": 36,
"name": "History"
},
{
"id": 27,
"name": "Horror"
},
{
"id": 10402,
"name": "Music"
},
{
"id": 9648,
"name": "Mystery"
},
{
"id": 10749,
"name": "Romance"
},
{
"id": 878,
"name": "Science Fiction"
},
{
"id": 10770,
"name": "TV Movie"
},
{
"id": 53,
"name": "Thriller"
},
{
"id": 10752,
"name": "War"
},
{
"id": 37,
"name": "Western"
}
]
}
genres = {}
for g in GENRES["genres"]:
genres[g["id"]] = g["name"]
# Caching TMDB request wrapper
def makeTmdbApiRequest(command, params={}):
h = hashlib.sha224((TMDB_BASE_URL+command+json.dumps(params)).encode()).hexdigest()
params["api_key"] = TMDB_API_KEY
try:
return json.loads(open("apicache/%s.json"%h).read())
except:
print("Making request '%s' with params '%s'"%(command, params))
print(" Hash: '%s'"%h)
r = requests.get(TMDB_BASE_URL+command, params=params)
d = r.json()
#print("Made request '%s' with hash '%s'"%(command, h))
#print("(params: '%s')"%json.dumps(params))
time.sleep(5)
open("apicache/%s.json"%h, "w").write(json.dumps(d))
return d
bechdelScores = json.load(open("bechdeltest.json"))
print(bechdelScores[0])
for i in range(len(bechdelScores)):
bechdel = bechdelScores[i]
res = makeTmdbApiRequest("find/tt%s"%bechdel["imdbid"], params={"external_source": "imdb_id"})
if len(res["movie_results"]) > 0:
print(res["movie_results"][0]["release_date"], res["movie_results"][0]["original_title"], bechdel["rating"])
bechdelScores[i]["movie"] = res["movie_results"][0]
print(bechdelScores[-1])
bechdelScores = filter(lambda score: "movie" in score and score["movie"].get("release_date") is not None, bechdelScores)
bechdelScores = map(lambda entry: {
"title": entry["title"],
"year": int(entry["movie"]["release_date"].split("-")[0]),
"lang": entry["movie"]["original_language"],
"vote_avg": entry["movie"]["vote_average"],
"popularity": entry["movie"]["popularity"],
"genres": entry["movie"]["genre_ids"],
"bechdel": int(entry["rating"])
}, bechdelScores)
bs = list(bechdelScores)
def aggregateBy(objects, keyFn, aggrFn, valueFn=lambda x: x):
result = {}
for o in objects:
key = keyFn(o)
if key in result:
result[key].append(valueFn(o))
else:
result[key] = [valueFn(o)]
aggregated = {}
for k,v in result.items():
aggregated[k] = aggrFn(v)
return aggregated
def multiAggregateBy(objects, keysFn, aggrFn, valueFn=lambda x: x):
result = {}
for o in objects:
keys = keysFn(o)
for key in keys:
if key in result:
result[key].append(valueFn(o))
else:
result[key] = [valueFn(o)]
aggregated = {}
for k,v in result.items():
aggregated[k] = aggrFn(v)
return aggregated
def avg(items):
return sum(items) / len(items)
for year,rating in aggregateBy(bs, lambda m: m["year"], avg, lambda v: v["bechdel"]).items():
print(year, rating)
# Group by genre
genre_avgs = list(multiAggregateBy(bs, lambda m: m["genres"], avg, lambda v: v["bechdel"]).items())
genre_avgs.sort(key=lambda x: x[1])
for genre, rating in genre_avgs:
print("%16s"%genres.get(genre, genre), "%.3f"%rating)
# Group by... both?
byYear = {}
for bechdelScore in bs:
year = int(bechdelScore['year'] / 1) * 1
if year not in byYear:
byYear[year] = []
byYear[year].append(bechdelScore)
pass
for year,scores in sorted(list(byYear.items())):
genre_avgs = list(multiAggregateBy(scores, lambda m: m["genres"], avg, lambda v: v["bechdel"]).values())
spread = max(genre_avgs) - min(genre_avgs)
print(year, spread)
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment