from lxml import html | |
import requests | |
from time import sleep | |
import json | |
import argparse | |
from collections import OrderedDict | |
from time import sleep | |
def parse(ticker): | |
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker) | |
response = requests.get(url, verify=False) | |
print ("Parsing %s"%(url)) | |
sleep(4) | |
parser = html.fromstring(response.text) | |
summary_table = parser.xpath('//div[contains(@data-test,"summary-table")]//tr') | |
summary_data = OrderedDict() | |
other_details_json_link = "https://query2.finance.yahoo.com/v10/finance/quoteSummary/{0}?formatted=true&lang=en-US®ion=US&modules=summaryProfile%2CfinancialData%2CrecommendationTrend%2CupgradeDowngradeHistory%2Cearnings%2CdefaultKeyStatistics%2CcalendarEvents&corsDomain=finance.yahoo.com".format(ticker) | |
summary_json_response = requests.get(other_details_json_link) | |
try: | |
json_loaded_summary = json.loads(summary_json_response.text) | |
y_Target_Est = json_loaded_summary["quoteSummary"]["result"][0]["financialData"]["targetMeanPrice"]['raw'] | |
earnings_list = json_loaded_summary["quoteSummary"]["result"][0]["calendarEvents"]['earnings'] | |
eps = json_loaded_summary["quoteSummary"]["result"][0]["defaultKeyStatistics"]["trailingEps"]['raw'] | |
datelist = [] | |
for i in earnings_list['earningsDate']: | |
datelist.append(i['fmt']) | |
earnings_date = ' to '.join(datelist) | |
for table_data in summary_table: | |
raw_table_key = table_data.xpath('.//td[contains(@class,"C(black)")]//text()') | |
raw_table_value = table_data.xpath('.//td[contains(@class,"Ta(end)")]//text()') | |
table_key = ''.join(raw_table_key).strip() | |
table_value = ''.join(raw_table_value).strip() | |
summary_data.update({table_key:table_value}) | |
summary_data.update({'1y Target Est':y_Target_Est,'EPS (TTM)':eps,'Earnings Date':earnings_date,'ticker':ticker,'url':url}) | |
return summary_data | |
except: | |
print ("Failed to parse json response") | |
return {"error":"Failed to parse json response"} | |
if __name__=="__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('ticker',help = '') | |
args = argparser.parse_args() | |
ticker = args.ticker | |
print ("Fetching data for %s"%(ticker)) | |
scraped_data = parse(ticker) | |
print ("Writing data to output file") | |
with open('%s-summary.json'%(ticker),'w') as fp: | |
json.dump(scraped_data,fp,indent = 4) |
This comment has been minimized.
This comment has been minimized.
I'm getting the following error:
Any tips? |
This comment has been minimized.
This comment has been minimized.
This is written for python 2 and you're using python 3. |
This comment has been minimized.
This comment has been minimized.
C:\Python36\Lib\site-packages>python yahoo_finance.py aapl This code is written for Python 2, dead giveaway is always |
This comment has been minimized.
This comment has been minimized.
I am getting this error. |
This comment has been minimized.
This comment has been minimized.
Really great resource here. Thanks so much for making this. I am working on writing some code that generates fair price per share estimates and this will be integral to my WACC calculation. |
This comment has been minimized.
This comment has been minimized.
I am trying to use Beautifulsoup. when I try to get a company details from Yahoo finance https://in.finance.yahoo.com/quote/HON?p=HON |
This comment has been minimized.
This comment has been minimized.
I modified this to get currentPrice only, works. But for ETFs it does not work because there is no current price in json response. For example: Any idea to get this work for ETFs? |
This comment has been minimized.
This comment has been minimized.
Any documentation that shows how to construct the query2.yahoo.finance url would be great, can't seem to find any and would love to scrape financial data. |
This comment has been minimized.
This comment has been minimized.
Found this might help others reading this. |
This comment has been minimized.
This comment has been minimized.
TLRY (a cannabis stock) returned 'None' a well. If you're looking for historical data, you can download it for free in a .csv via NASDAQ. Great code though. Thanks! |
This comment has been minimized.
This comment has been minimized.
I try to execute this code but i have the error ,i Don't knwo how to fix it please help me. Current_position_=0; File "", line 2 |
This comment has been minimized.
This comment has been minimized.
Here is the updated code to work with latest structure of Yahoo Finance. https://gist.github.com/scrapehero-code/6d87e1e1369ee701dcea8880b4b620e9 |
This comment has been minimized.
It seems that not every stock has it's info stored on the query2 website. For example Nestle (NESN, NESN.VX or nesn.vx). Has anyone else come across this issue? How did you resolve?