Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Last active October 24, 2023 15:14
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dimitryzub/a5e30389e13142b9262f52154cd56092 to your computer and use it in GitHub Desktop.
Save dimitryzub/a5e30389e13142b9262f52154cd56092 to your computer and use it in GitHub Desktop.
A script that scrapes Google Finance Ticker in Python - google.com/finance/quote/
import nasdaqdatalink
import requests, json, re
from parsel import Selector
from itertools import zip_longest
def scrape_google_finance(ticker: str):
params = {
"hl": "en" # language
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
}
html = requests.get(f"https://www.google.com/finance/quote/{ticker}", params=params, headers=headers, timeout=30)
selector = Selector(text=html.text)
# where all extracted data will be temporary located
ticker_data = {
"ticker_data": {},
"about_panel": {},
"news": {"items": []},
"finance_perfomance": {"table": []},
"people_also_search_for": {"items": []},
"interested_in": {"items": []}
}
# current price, quote, title extraction
ticker_data["ticker_data"]["current_price"] = selector.css(".AHmHk .fxKbKc::text").get()
ticker_data["ticker_data"]["quote"] = selector.css(".PdOqHc::text").get().replace(" • ",":")
ticker_data["ticker_data"]["title"] = selector.css(".zzDege::text").get()
# about panel extraction
about_panel_keys = selector.css(".gyFHrc .mfs7Fc::text").getall()
about_panel_values = selector.css(".gyFHrc .P6K39c").xpath("normalize-space()").getall()
for key, value in zip_longest(about_panel_keys, about_panel_values):
key_value = key.lower().replace(" ", "_")
ticker_data["about_panel"][key_value] = value
# description "about" extraction
ticker_data["about_panel"]["description"] = selector.css(".bLLb2d::text").get()
ticker_data["about_panel"]["extensions"] = selector.css(".w2tnNd::text").getall()
# news extarction
if selector.css(".yY3Lee").get():
for index, news in enumerate(selector.css(".yY3Lee"), start=1):
ticker_data["news"]["items"].append({
"position": index,
"title": news.css(".Yfwt5::text").get(),
"link": news.css(".z4rs2b a::attr(href)").get(),
"source": news.css(".sfyJob::text").get(),
"published": news.css(".Adak::text").get(),
"thumbnail": news.css("img.Z4idke::attr(src)").get()
})
else:
ticker_data["news"]["error"] = f"No news result from a {ticker}."
# finance perfomance table
if selector.css(".slpEwd .roXhBd").get():
fin_perf_col_2 = selector.css(".PFjsMe+ .yNnsfe::text").get() # e.g. Dec 2021
fin_perf_col_3 = selector.css(".PFjsMe~ .yNnsfe+ .yNnsfe::text").get() # e.g. Year/year change
for fin_perf in selector.css(".slpEwd .roXhBd"):
if fin_perf.css(".J9Jhg::text , .jU4VAc::text").get():
perf_key = fin_perf.css(".J9Jhg::text , .jU4VAc::text").get() # e.g. Revenue, Net Income, Operating Income..
perf_value_col_1 = fin_perf.css(".QXDnM::text").get() # 60.3B, 26.40%..
perf_value_col_2 = fin_perf.css(".gEUVJe .JwB6zf::text").get() # 2.39%, -21.22%..
ticker_data["finance_perfomance"]["table"].append({
perf_key: {
fin_perf_col_2: perf_value_col_1,
fin_perf_col_3: perf_value_col_2
}
})
else:
ticker_data["finance_perfomance"]["error"] = f"No 'finence perfomance table' for {ticker}."
# "you may be interested in" results
if selector.css(".HDXgAf .tOzDHb").get():
for index, other_interests in enumerate(selector.css(".HDXgAf .tOzDHb"), start=1):
ticker_data["interested_in"]["items"].append(discover_more_tickers(index, other_interests))
else:
ticker_data["interested_in"]["error"] = f"No 'you may be interested in` results for {ticker}"
# "people also search for" results
if selector.css(".HDXgAf+ div .tOzDHb").get():
for index, other_tickers in enumerate(selector.css(".HDXgAf+ div .tOzDHb"), start=1):
ticker_data["people_also_search_for"]["items"].append(discover_more_tickers(index, other_tickers))
else:
ticker_data["people_also_search_for"]["error"] = f"No 'people_also_search_for` in results for {ticker}"
return ticker_data
def discover_more_tickers(index: int, other_data: str):
"""
if price_change_formatted will start complaining,
check beforehand for None values with try/except and set it to 0, in this function.
however, re.search(r"\d{1}%|\d{1,10}\.\d{1,2}%" should make the job done.
"""
return {
"position": index,
"ticker": other_data.css(".COaKTb::text").get(),
"ticker_link": f'https://www.google.com/finance{other_data.attrib["href"].replace("./", "/")}',
"title": other_data.css(".RwFyvf::text").get(),
"price": other_data.css(".YMlKec::text").get(),
"price_change": other_data.css("[jsname=Fe7oBc]::attr(aria-label)").get(),
# https://regex101.com/r/BOFBlt/1
# Up by 100.99% -> 100.99%
"price_change_formatted": re.search(r"\d{1}%|\d{1,10}\.\d{1,2}%", other_data.css("[jsname=Fe7oBc]::attr(aria-label)").get()).group()
}
scrape_google_finance(ticker="GOOGL:NASDAQ")
# Google Finance uses Nasdaq or other time-series data providers to built the chart.
# You can find data provider somewhere near their Disclaimer: https://www.google.com/intl/en_UA/googlefinance/disclaimer/
def nasdaq_get_timeseries_data():
nasdaqdatalink.read_key(filename=".nasdaq_api_key")
# print(nasdaqdatalink.ApiConfig.api_key) # prints api key from the .nasdaq_api_key file
timeseries_data = nasdaqdatalink.get("WIKI/GOOGL", collapse="monthly") # not sure what "WIKI" stands for
print(timeseries_data)
# nasdaq_get_timeseries_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment