#!/usr/bin/env python3 | |
# usage: aoty [year] | |
# | |
# This script collects all the albums of the year for Alf's awesome | |
# AOTY site http://apps.hubmed.org/aoty and prints out the albums | |
# that appear on more than one Album of the Year list. | |
# | |
# You'll need beautifulsoup4 and requests to run this. | |
import sys | |
import datetime | |
import requests | |
from bs4 import BeautifulSoup | |
from collections import Counter | |
if len(sys.argv) > 1: | |
year = sys.argv[1] | |
else: | |
year = str(datetime.date.today().year) | |
counter = Counter() | |
url = 'http://apps.hubmed.org/aoty/' + year + '/' | |
while True: | |
html = requests.get(url).text | |
doc = BeautifulSoup(html, features="html.parser") | |
for li in doc.find_all('li'): | |
if li.get('itemtype') == 'http://schema.org/MusicAlbum': | |
band, album = [a.text.strip() for a in li.find_all('a')] | |
s = '{} - {}'.format(band, album) | |
counter[s] += 1 | |
next_url = doc.select('a[rel="next"]') | |
if len(next_url) > 0: | |
url = 'http://apps.hubmed.org' + next_url[0]['href'] | |
else: | |
break | |
for name, count in counter.most_common(): | |
if count > 1: | |
print('{: >2} {}'.format(count, name)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment