Skip to content

Instantly share code, notes, and snippets.

@imdkm
Created June 20, 2017 12:30
Show Gist options
  • Save imdkm/044b9f2fdda3bdb78ab0c77515698078 to your computer and use it in GitHub Desktop.
Save imdkm/044b9f2fdda3bdb78ab0c77515698078 to your computer and use it in GitHub Desktop.
import requests, csv
from bs4 import BeautifulSoup
from time import sleep
for year in range(1940,2017):
print("start scraping " + str(year) + "'s chart.")
# initialize and put header on the main 'chart' list.
chart = [["rank", "artist name", "song title"]]
# make a url of the year and get its html data.
url = "http://billboardtop100of.com/" + str(year) + "-2/"
r = requests.get(url)
# check if the url is valid. if not, skip the whole step below.
if r.status_code != 200:
print("url error: " + url)
continue
# make a list of elements with BeautifulSoup.
soup = BeautifulSoup(r.content, "html.parser")
items = soup.find_all("td")
# from 'items' list, make a 2d-list 'chart'.
# replace commas in items to underbar to avoid confusion with separators.
for i, item in enumerate(items):
if i % 3 == 0:
chart.append([items[i].text,
items[i + 1].text.replace(",","_"),
items[i + 2].text.replace(",","_")])
# save list in csv format.
with open('./csv/' + str(year) + '.csv', 'w', encoding='utf-8') as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerows(chart)
print("-> " + str(year) + ".csv is finished. sleep for a while. \n")
sleep(30)
print("whole process is done. check the directory")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment