Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Yahoo Finance Stock Price Scraper - Mosaic of 40 Brazilian stocks
# -*- coding: utf-8 -*-
from mpl_finance import candlestick_ohlc
import numpy as np
from matplotlib import pyplot
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request
import re
pyplot.style.use("ggplot")
#pyplot.figure(figsize=(25.60,14.40))
pyplot.figure(figsize=(12.80,7.20))
def getstk(code) :
url = "https://finance.yahoo.com/quote/"+code+"/history"
webpage = urllib.request.urlopen(url)
soup = BeautifulSoup(webpage,"lxml")
d = list(soup.findAll("td"))
tf = []
def isfloat(v) :
try :
float(v)
return True
except:
return False
def isvol(v) :
try :
int("".join(v.split(","))) == int
return True
except:
return False
for n in range(0, len(d)-1) : #
x = re.split("[>+<]",str(d[n]))
x1 = re.split("[>+<]",str(d[n+1]))
if len(x) == 5 : # Length of 0 day volume
if x[4] == "" : # Days of zero volume
tf.append(0)
if len(x) == 9 : # normal day length
if isfloat(x[4]) == True : # Prices
tf.append(float(x[4]))
if isvol(x[4]) == True : # Volume
tf.append(int("".join(x[4].split(","))))
if isvol(x[4]) == False and isfloat(x[4]) == False: # Dates
if len(x1) < 10 : # Dividend has len of 17
tf.append(x[4])
stockFile = pd.DataFrame()
def MakeDf(name,v) :
l = []
for n in range(v,len(tf),7) :
l.append(tf[n])
stockFile[name] = l[::-1]
return
MakeDf("Date",0)
MakeDf("Open",1)
MakeDf("High",2)
MakeDf("Low",3)
MakeDf("Close",4)
MakeDf("Adj.Close",5)
MakeDf("Volume",6)
# Adjusting the prices for dividends
stockFile["Dif"] = stockFile["Adj.Close"] - stockFile["Close"]
stockFile["Open.Adj"] = stockFile["Open"] + stockFile["Dif"]
stockFile["High.Adj"] = stockFile["High"] + stockFile["Dif"]
stockFile["Low.Adj"] = stockFile["Low"] + stockFile["Dif"]
stockFile = stockFile.query('Volume !=0') # excluding days without moviment (volume = 0)
stockFile = stockFile.dropna()
stockFile.index = range(len(stockFile)) # Adjusting the index if a drop is done
return stockFile
def grafico(stk,eixo) :
stk1 = stk + '.sa' # .sa = local code (Bovespa)
stock = getstk(stk1)
dia = stock["Date"][len(stock)-1]
print ()
print ("Stock : ", stk)
print ("Last date : ", dia)
print ("Last price : ", stock["Close"][len(stock)-1])
#Moving average calculation
MM1 = list(stock["Close"].rolling(window = 20).mean()) # 20-day period
OHLC = []
prd = 15 # days window for each stock
eixoX = np.linspace(1, prd+1, num = prd+1)
eixoY = [MM1[-1]] * (prd+1)
print ("Moving average : ", round(MM1[-1],2))
l = len(stock)
dt = 1
for n in range (l-prd-1,l,) : # Making a candlestick list
xm = dt,stock['Open.Adj'].iloc[n],stock['High.Adj'].iloc[n],stock['Low.Adj'].iloc[n],stock['Adj.Close'].iloc[n]
OHLC.append(xm)
dt +=1
bboxP = dict(boxstyle = "round,pad=0.1",fc="white",ec = "black",lw = 0.5, alpha = 0.35)
candlestick_ohlc(eixo, OHLC, width=0.7, colorup = 'green', colordown = 'red')
pyplot.plot(eixoX, eixoY, color = "darkorange", alpha = 0.5) # last day moving average level
pyplot.text(prd+1,OHLC[-1][4],str(round(OHLC[-1][4],2)), ha = "center", va = "center", size = 6.0, bbox = bboxP) # last price label
g = pyplot.gca()
pyplot.title(stk,fontsize = 7.6)
g.set_xticklabels([])
g.set_yticklabels([])
return (dia)
# Stock list (40)
lista = ['abev3','bbas3','bbdc4','bbse3','brap4','brfs3','brkm5','btow3','ccro3','ciel3',
'cmig4','cple6','csna3','dtex3','elet6','embr3','eztc3','ggbr4','grnd3','itub4',
'irbr3','klbn4','lame4','mdia3','mglu3','natu3','pcar4','petr3','pssa3','sbsp3',
'suzb3','timp3','trpl4','radl3','rapt4','tots3','usim5','vale3','vivt4','wege3']
ct = 0
data = []
grid = (4,10) # 4 lines 10 columns
for l in range(0,grid[0]) : # 4 lines
for c in range(0,grid[1]) : # 10 columns
eixo = pyplot.subplot2grid(grid,(l,c), colspan=1, rowspan=1)
dia = grafico(lista[ct],eixo)
data.append(dia)
pyplot.draw()
ct +=1
pyplot.suptitle(dia,fontsize = 8)
pyplot.subplots_adjust(left = 0.03, right = 0.97, top = 0.93, bottom = 0.03, hspace = 0.20, wspace = 0.14) # minor chart windows adjustments
pyplot.show()
@Especuloide

This comment has been minimized.

Copy link
Owner Author

commented May 29, 2018

After pandas.datareader deprecated Yahoo Finance, I did a web scraper using Beautiful Soup to get stock prices from there, but there are limitations : Its only able to get about 100 days minus every day the a dividend (if any) is paid for a particular stock.

Bear in mind that although Yahoo Finance adjusts the price for dividends It did not adjust splits (at least for Brazilian stocks).

It can be easily adjust for stocks of other countries covered By Yahoo F. simply altering the local stock country code (line 93)

code

@Especuloide

This comment has been minimized.

Copy link
Owner Author

commented Jul 2, 2019

Some trouble scraping "LREN3" - some days work, some days not - changed to "IRBR3" instead.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.