-
-
Save scrapehero/516fc801a210433602fe9fd41a69b496 to your computer and use it in GitHub Desktop.
from lxml import html | |
import requests | |
from time import sleep | |
import json | |
import argparse | |
from collections import OrderedDict | |
from time import sleep | |
def parse(ticker): | |
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker) | |
response = requests.get(url, verify=False) | |
print ("Parsing %s"%(url)) | |
sleep(4) | |
parser = html.fromstring(response.text) | |
summary_table = parser.xpath('//div[contains(@data-test,"summary-table")]//tr') | |
summary_data = OrderedDict() | |
other_details_json_link = "https://query2.finance.yahoo.com/v10/finance/quoteSummary/{0}?formatted=true&lang=en-US®ion=US&modules=summaryProfile%2CfinancialData%2CrecommendationTrend%2CupgradeDowngradeHistory%2Cearnings%2CdefaultKeyStatistics%2CcalendarEvents&corsDomain=finance.yahoo.com".format(ticker) | |
summary_json_response = requests.get(other_details_json_link) | |
try: | |
json_loaded_summary = json.loads(summary_json_response.text) | |
y_Target_Est = json_loaded_summary["quoteSummary"]["result"][0]["financialData"]["targetMeanPrice"]['raw'] | |
earnings_list = json_loaded_summary["quoteSummary"]["result"][0]["calendarEvents"]['earnings'] | |
eps = json_loaded_summary["quoteSummary"]["result"][0]["defaultKeyStatistics"]["trailingEps"]['raw'] | |
datelist = [] | |
for i in earnings_list['earningsDate']: | |
datelist.append(i['fmt']) | |
earnings_date = ' to '.join(datelist) | |
for table_data in summary_table: | |
raw_table_key = table_data.xpath('.//td[contains(@class,"C(black)")]//text()') | |
raw_table_value = table_data.xpath('.//td[contains(@class,"Ta(end)")]//text()') | |
table_key = ''.join(raw_table_key).strip() | |
table_value = ''.join(raw_table_value).strip() | |
summary_data.update({table_key:table_value}) | |
summary_data.update({'1y Target Est':y_Target_Est,'EPS (TTM)':eps,'Earnings Date':earnings_date,'ticker':ticker,'url':url}) | |
return summary_data | |
except: | |
print ("Failed to parse json response") | |
return {"error":"Failed to parse json response"} | |
if __name__=="__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('ticker',help = '') | |
args = argparser.parse_args() | |
ticker = args.ticker | |
print ("Fetching data for %s"%(ticker)) | |
scraped_data = parse(ticker) | |
print ("Writing data to output file") | |
with open('%s-summary.json'%(ticker),'w') as fp: | |
json.dump(scraped_data,fp,indent = 4) |
I modified this to get currentPrice only, works. But for ETFs it does not work because there is no current price in json response.
For example:
Any idea to get this work for ETFs?
Any documentation that shows how to construct the query2.yahoo.finance url would be great, can't seem to find any and would love to scrape financial data.
Any documentation that shows how to construct the query2.yahoo.finance url would be great, can't seem to find any and would love to scrape financial data.
Found this might help others reading this.
https://observablehq.com/@stroked/yahoofinance
TLRY (a cannabis stock) returned 'None' a well. If you're looking for historical data, you can download it for free in a .csv via NASDAQ. Great code though. Thanks!
I try to execute this code but i have the error ,i Don't knwo how to fix it please help me.
Current_position_=0;
PROFIT_EXIT_PRICE_PERCENT = 0.2;
LOSS_EXIT_PRICE_PERCENT = -0.1;
def OnMarketPriceChange( current_price, current_time ):
If Current_position_ == 0 AND ( current_price - price_two_hours_ago ) / current_price >; 10%:
SendBuyOrderAtCurrentPrice();
Current_position_ = Current_position_ + 1;
File "", line 2
If Current_position_ == 0 AND ( current_price - price_two_hours_ago ) / current_price >; 10%:
^
SyntaxError: invalid syntax
Here is the updated code to work with latest structure of Yahoo Finance.
https://gist.github.com/scrapehero-code/6d87e1e1369ee701dcea8880b4b620e9
I am trying to use Beautifulsoup. when I try to get a company details from Yahoo finance https://in.finance.yahoo.com/quote/HON?p=HON
name_box = soup.find("h2", attrs={"class": "Fz(m) Lh(1) Fw(b) Mt(0)"})
When I use the above code, I am geting "None" value
Please help me to figure out this.