imdkm/scrapingbillboardtop100of.py

## scrapingbillboardtop100of.py
import requests, csv
from bs4 import BeautifulSoup
from time import sleep

for year in range(1940,2017):
    print("start scraping " + str(year) + "'s chart.")

    # initialize and put header on the main 'chart' list.
    chart = [["rank", "artist name", "song title"]]

    # make a url of the year and get its html data.
    url = "http://billboardtop100of.com/" + str(year) + "-2/"
    r = requests.get(url)

    # check if the url is valid. if not, skip the whole step below.
    if r.status_code != 200:
        print("url error: " + url)
        continue

    # make a list of elements with BeautifulSoup.
    soup = BeautifulSoup(r.content, "html.parser")
    items = soup.find_all("td")

    # from 'items' list, make a 2d-list 'chart'.
    # replace commas in items to underbar to avoid confusion with separators.
    for i, item in enumerate(items):
        if i % 3 == 0:
            chart.append([items[i].text,
                          items[i + 1].text.replace(",","_"),
                          items[i + 2].text.replace(",","_")])

    # save list in csv format.
    with open('./csv/' + str(year) + '.csv', 'w', encoding='utf-8') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(chart)

    print("-> " + str(year) + ".csv is finished. sleep for a while. \n")

    sleep(30)

print("whole process is done. check the directory")
	import requests, csv
	from bs4 import BeautifulSoup
	from time import sleep

	for year in range(1940,2017):
	print("start scraping " + str(year) + "'s chart.")

	# initialize and put header on the main 'chart' list.
	chart = [["rank", "artist name", "song title"]]

	# make a url of the year and get its html data.
	url = "http://billboardtop100of.com/" + str(year) + "-2/"
	r = requests.get(url)

	# check if the url is valid. if not, skip the whole step below.
	if r.status_code != 200:
	print("url error: " + url)
	continue

	# make a list of elements with BeautifulSoup.
	soup = BeautifulSoup(r.content, "html.parser")
	items = soup.find_all("td")

	# from 'items' list, make a 2d-list 'chart'.
	# replace commas in items to underbar to avoid confusion with separators.
	for i, item in enumerate(items):
	if i % 3 == 0:
	chart.append([items[i].text,
	items[i + 1].text.replace(",","_"),
	items[i + 2].text.replace(",","_")])

	# save list in csv format.
	with open('./csv/' + str(year) + '.csv', 'w', encoding='utf-8') as f:
	writer = csv.writer(f, lineterminator='\n')
	writer.writerows(chart)

	print("-> " + str(year) + ".csv is finished. sleep for a while. \n")

	sleep(30)

	print("whole process is done. check the directory")