Skip to content

Instantly share code, notes, and snippets.

@edsu
Last active December 14, 2019 14:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edsu/d2a73931fdc80bf92b5533162b59dd35 to your computer and use it in GitHub Desktop.
Save edsu/d2a73931fdc80bf92b5533162b59dd35 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# usage: aoty [year]
#
# This script collects all the albums of the year for Alf's awesome
# AOTY site http://apps.hubmed.org/aoty and prints out the albums
# that appear on more than one Album of the Year list.
#
# You'll need beautifulsoup4 and requests to run this.
import sys
import datetime
import requests
from bs4 import BeautifulSoup
from collections import Counter
if len(sys.argv) > 1:
year = sys.argv[1]
else:
year = str(datetime.date.today().year)
counter = Counter()
url = 'http://apps.hubmed.org/aoty/' + year + '/'
while True:
html = requests.get(url).text
doc = BeautifulSoup(html, features="html.parser")
for li in doc.find_all('li'):
if li.get('itemtype') == 'http://schema.org/MusicAlbum':
band, album = [a.text.strip() for a in li.find_all('a')]
s = '{} - {}'.format(band, album)
counter[s] += 1
next_url = doc.select('a[rel="next"]')
if len(next_url) > 0:
url = 'http://apps.hubmed.org' + next_url[0]['href']
else:
break
for name, count in counter.most_common():
if count > 1:
print('{: >2} {}'.format(count, name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment