Skip to content

Instantly share code, notes, and snippets.

@lewiseason
Created Aug 22, 2016
Embed
What would you like to do?
Hammer the official UK chart from a known starting point and write out a CSV of title, artist and highest position achieved
import csv
from lxml.html import parse
inf = float('inf')
base_uri = 'http://www.officialcharts.com'
start_page = '/charts/singles-chart/20150828/7501/'
positions = {}
def parse_page(page):
tree = parse(base_uri + page)
for row in tree.xpath('//table[@class="chart-positions"]/tr'):
track = row.xpath('./td/div[@class="track"]')
if track == []: continue
position = int(row.xpath('./td[4]/text()')[0])
title = row.xpath('./td[3]/div/div[2]/div[1]/a/text()')[0]
artist = row.xpath('./td[3]/div/div[2]/div[2]/a/text()')[0]
track = (title, artist)
last_known_position = positions.get(track, inf)
if position < last_known_position:
positions[track] = position
next_links = tree.xpath('//a[@class="next chart-date-directions"]/@href')
if len(next_links) > 0:
return next_links[0]
page = start_page
count = 0
while page:
page = parse_page(page)
count += 1
with open('/tmp/chart.csv', 'wb') as csvfile:
w = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for track, position in positions.items():
w.writerow([track[1], track[0], position])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment