edsu/aoty

## aoty
#!/usr/bin/env python3

# usage: aoty [year]
#
# This script collects all the albums of the year for Alf's awesome
# AOTY site http://apps.hubmed.org/aoty and prints out the albums
# that appear on more than one Album of the Year list.
#
# You'll need beautifulsoup4 and requests to run this.

import sys
import datetime
import requests

from bs4 import BeautifulSoup
from collections import Counter

if len(sys.argv) > 1:
    year = sys.argv[1]
else:
    year = str(datetime.date.today().year)

counter = Counter()

url = 'http://apps.hubmed.org/aoty/' + year + '/'
while True:
    html = requests.get(url).text
    doc = BeautifulSoup(html, features="html.parser")
    for li in doc.find_all('li'):
        if li.get('itemtype') == 'http://schema.org/MusicAlbum':
            band, album = [a.text.strip() for a in li.find_all('a')]
            s = '{} - {}'.format(band, album)
            counter[s] += 1
    next_url = doc.select('a[rel="next"]')
    if len(next_url) > 0:
        url = 'http://apps.hubmed.org' + next_url[0]['href']
    else:
        break

for name, count in counter.most_common():
    if count > 1:
        print('{: >2} {}'.format(count, name))
	#!/usr/bin/env python3

	# usage: aoty [year]
	#
	# This script collects all the albums of the year for Alf's awesome
	# AOTY site http://apps.hubmed.org/aoty and prints out the albums
	# that appear on more than one Album of the Year list.
	#
	# You'll need beautifulsoup4 and requests to run this.

	import sys
	import datetime
	import requests

	from bs4 import BeautifulSoup
	from collections import Counter

	if len(sys.argv) > 1:
	year = sys.argv[1]
	else:
	year = str(datetime.date.today().year)

	counter = Counter()

	url = 'http://apps.hubmed.org/aoty/' + year + '/'
	while True:
	html = requests.get(url).text
	doc = BeautifulSoup(html, features="html.parser")
	for li in doc.find_all('li'):
	if li.get('itemtype') == 'http://schema.org/MusicAlbum':
	band, album = [a.text.strip() for a in li.find_all('a')]
	s = '{} - {}'.format(band, album)
	counter[s] += 1
	next_url = doc.select('a[rel="next"]')
	if len(next_url) > 0:
	url = 'http://apps.hubmed.org' + next_url[0]['href']
	else:
	break

	for name, count in counter.most_common():
	if count > 1:
	print('{: >2} {}'.format(count, name))