-
-
Save scrapehero/516fc801a210433602fe9fd41a69b496 to your computer and use it in GitHub Desktop.
from lxml import html | |
import requests | |
from time import sleep | |
import json | |
import argparse | |
from collections import OrderedDict | |
from time import sleep | |
def parse(ticker): | |
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker) | |
response = requests.get(url, verify=False) | |
print ("Parsing %s"%(url)) | |
sleep(4) | |
parser = html.fromstring(response.text) | |
summary_table = parser.xpath('//div[contains(@data-test,"summary-table")]//tr') | |
summary_data = OrderedDict() | |
other_details_json_link = "https://query2.finance.yahoo.com/v10/finance/quoteSummary/{0}?formatted=true&lang=en-US®ion=US&modules=summaryProfile%2CfinancialData%2CrecommendationTrend%2CupgradeDowngradeHistory%2Cearnings%2CdefaultKeyStatistics%2CcalendarEvents&corsDomain=finance.yahoo.com".format(ticker) | |
summary_json_response = requests.get(other_details_json_link) | |
try: | |
json_loaded_summary = json.loads(summary_json_response.text) | |
y_Target_Est = json_loaded_summary["quoteSummary"]["result"][0]["financialData"]["targetMeanPrice"]['raw'] | |
earnings_list = json_loaded_summary["quoteSummary"]["result"][0]["calendarEvents"]['earnings'] | |
eps = json_loaded_summary["quoteSummary"]["result"][0]["defaultKeyStatistics"]["trailingEps"]['raw'] | |
datelist = [] | |
for i in earnings_list['earningsDate']: | |
datelist.append(i['fmt']) | |
earnings_date = ' to '.join(datelist) | |
for table_data in summary_table: | |
raw_table_key = table_data.xpath('.//td[contains(@class,"C(black)")]//text()') | |
raw_table_value = table_data.xpath('.//td[contains(@class,"Ta(end)")]//text()') | |
table_key = ''.join(raw_table_key).strip() | |
table_value = ''.join(raw_table_value).strip() | |
summary_data.update({table_key:table_value}) | |
summary_data.update({'1y Target Est':y_Target_Est,'EPS (TTM)':eps,'Earnings Date':earnings_date,'ticker':ticker,'url':url}) | |
return summary_data | |
except: | |
print ("Failed to parse json response") | |
return {"error":"Failed to parse json response"} | |
if __name__=="__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('ticker',help = '') | |
args = argparser.parse_args() | |
ticker = args.ticker | |
print ("Fetching data for %s"%(ticker)) | |
scraped_data = parse(ticker) | |
print ("Writing data to output file") | |
with open('%s-summary.json'%(ticker),'w') as fp: | |
json.dump(scraped_data,fp,indent = 4) |
I'm getting the following error:
C:\Python36\Lib\site-packages>python yahoo_finance.py aapl
File "yahoo_finance.py", line 12
print "Parsing %s"%(url)
^
SyntaxError: invalid syntax
Any tips?
This is written for python 2 and you're using python 3.
I'm getting the following error:
C:\Python36\Lib\site-packages>python yahoo_finance.py aapl
File "yahoo_finance.py", line 12
print "Parsing %s"%(url)
^
SyntaxError: invalid syntax
Any tips?
This code is written for Python 2, dead giveaway is always print "Print function has no ( ) around what's after print"
. You are using Python 3. I hope that helps :-)
I am getting this error.
C:\Python36>python yahoo_finance.py APPL Fetching data for APPL Parsing https://finance.yahoo.com/quote/APPL?p=APPL Failed to parse json response Writing data to output file
Really great resource here. Thanks so much for making this. I am working on writing some code that generates fair price per share estimates and this will be integral to my WACC calculation.
ScrapeHero you are my hero!
I am trying to use Beautifulsoup. when I try to get a company details from Yahoo finance https://in.finance.yahoo.com/quote/HON?p=HON
name_box = soup.find("h2", attrs={"class": "Fz(m) Lh(1) Fw(b) Mt(0)"})
When I use the above code, I am geting "None" value
Please help me to figure out this.
I modified this to get currentPrice only, works. But for ETFs it does not work because there is no current price in json response.
For example:
Any idea to get this work for ETFs?
Any documentation that shows how to construct the query2.yahoo.finance url would be great, can't seem to find any and would love to scrape financial data.
Any documentation that shows how to construct the query2.yahoo.finance url would be great, can't seem to find any and would love to scrape financial data.
Found this might help others reading this.
https://observablehq.com/@stroked/yahoofinance
TLRY (a cannabis stock) returned 'None' a well. If you're looking for historical data, you can download it for free in a .csv via NASDAQ. Great code though. Thanks!
I try to execute this code but i have the error ,i Don't knwo how to fix it please help me.
Current_position_=0;
PROFIT_EXIT_PRICE_PERCENT = 0.2;
LOSS_EXIT_PRICE_PERCENT = -0.1;
def OnMarketPriceChange( current_price, current_time ):
If Current_position_ == 0 AND ( current_price - price_two_hours_ago ) / current_price >; 10%:
SendBuyOrderAtCurrentPrice();
Current_position_ = Current_position_ + 1;
File "", line 2
If Current_position_ == 0 AND ( current_price - price_two_hours_ago ) / current_price >; 10%:
^
SyntaxError: invalid syntax
Here is the updated code to work with latest structure of Yahoo Finance.
https://gist.github.com/scrapehero-code/6d87e1e1369ee701dcea8880b4b620e9
It seems that not every stock has it's info stored on the query2 website. For example Nestle (NESN, NESN.VX or nesn.vx). Has anyone else come across this issue? How did you resolve?