Zabanaa/stocks.py

## stocks.py
# This script will attempt to fetch data from Yahoo finance for a particular
# stock, get the prices for the last 6 months and save the info to a csv file

import requests
from bs4 import BeautifulSoup
import csv
import time

def get_prices(url):
    print("Fetching Prices ...")
    time.sleep(1)

    initial_request = requests.get(url) # Making the request
    html_response = initial_request.text # Get the html source
    soup = BeautifulSoup(html_response, "html.parser") # Turn that into a soup object
    tables = soup.find_all("table", {"cellpadding":"2"}) # The CSS selectors could be improved on the site
                                                         # the table we are interested in has some generic table
                                                         # attributes which makes it difficult to target
    stock_table = tables[4] # this is the table containing our stock prices
    rows = stock_table.find_all("tr")[1:-1] # we ignore the first and the last rows  as they don't contain any price information
    prices = [] # This is an empty list that will contain the price information for the stock

    for row in rows:
        cells = row.find_all("td")
        date = cells[0].string
        adj_price = cells[-1].string
        prices.append([date, adj_price])

    print("Prices stored, preparing the info ...")
    return prices

def copy_to_csv(price_list, file_name):
    prices_file = open(file_name + ".csv", "a")
    prices_file_writer = csv.writer(prices_file)
    print("Writing prices to csv file ...")
    time.sleep(1)
    for price in price_list:
        prices_file_writer.writerow(price)
    prices_file.close()
    print("Prices saved to the csv file, open it !")


if __name__ == "__main__":

    page = 0

    """
        The stock prices table has a row of headings
        One problem I came across is that when fetching
        and appending them to the prices list (in the get_prices function)
        they would repeat in the outputed csv file.
        Example: The script would fetch the data for page 1 and write it
        to the file, then it would go on to page 2 and add the headings again and so on
        for every page.

        The short term solution I came up with is to open the file and manually add
        the heading titles myself before calling the functions, that way
        it doesn't append a new row of headings for each new fetched page.
    """

    with open("apple-prices.csv", "w") as apple_prices:
        apple_prices_writer = csv.writer(apple_prices)
        apple_prices_writer.writerow(["Date", "Adj Close"])
    apple_prices.close()

    while page < 198:

        apple_stock_url = "https://uk.finance.yahoo.com/q/hp?s=AAPL&a=03&b=6&c=2015&d=11&e=2&f=2015&g=d&z=66&y=" + str(page)

        print(apple_stock_url)

        apple_prices = get_prices(apple_stock_url)

        copy_to_csv(apple_prices, "apple-prices")

        page += 66
	# This script will attempt to fetch data from Yahoo finance for a particular
	# stock, get the prices for the last 6 months and save the info to a csv file

	import requests
	from bs4 import BeautifulSoup
	import csv
	import time

	def get_prices(url):
	print("Fetching Prices ...")
	time.sleep(1)

	initial_request = requests.get(url) # Making the request
	html_response = initial_request.text # Get the html source
	soup = BeautifulSoup(html_response, "html.parser") # Turn that into a soup object
	tables = soup.find_all("table", {"cellpadding":"2"}) # The CSS selectors could be improved on the site
	# the table we are interested in has some generic table
	# attributes which makes it difficult to target
	stock_table = tables[4] # this is the table containing our stock prices
	rows = stock_table.find_all("tr")[1:-1] # we ignore the first and the last rows as they don't contain any price information
	prices = [] # This is an empty list that will contain the price information for the stock

	for row in rows:
	cells = row.find_all("td")
	date = cells[0].string
	adj_price = cells[-1].string
	prices.append([date, adj_price])

	print("Prices stored, preparing the info ...")
	return prices

	def copy_to_csv(price_list, file_name):
	prices_file = open(file_name + ".csv", "a")
	prices_file_writer = csv.writer(prices_file)
	print("Writing prices to csv file ...")
	time.sleep(1)
	for price in price_list:
	prices_file_writer.writerow(price)
	prices_file.close()
	print("Prices saved to the csv file, open it !")


	if __name__ == "__main__":

	page = 0

	"""
	The stock prices table has a row of headings
	One problem I came across is that when fetching
	and appending them to the prices list (in the get_prices function)
	they would repeat in the outputed csv file.
	Example: The script would fetch the data for page 1 and write it
	to the file, then it would go on to page 2 and add the headings again and so on
	for every page.

	The short term solution I came up with is to open the file and manually add
	the heading titles myself before calling the functions, that way
	it doesn't append a new row of headings for each new fetched page.
	"""

	with open("apple-prices.csv", "w") as apple_prices:
	apple_prices_writer = csv.writer(apple_prices)
	apple_prices_writer.writerow(["Date", "Adj Close"])
	apple_prices.close()

	while page < 198:

	apple_stock_url = "https://uk.finance.yahoo.com/q/hp?s=AAPL&a=03&b=6&c=2015&d=11&e=2&f=2015&g=d&z=66&y=" + str(page)

	print(apple_stock_url)

	apple_prices = get_prices(apple_stock_url)

	copy_to_csv(apple_prices, "apple-prices")

	page += 66