Created
June 8, 2022 13:43
-
-
Save Kazanskyi/aa82adebf891c858b6fd537b52a05f0d to your computer and use it in GitHub Desktop.
ZACK.com future earnings and dividends data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from datetime import date | |
import pandas as pd | |
import numpy as np | |
from selenium import webdriver | |
from webdriver_manager.chrome import ChromeDriverManager | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support.select import Select | |
import io | |
#symbol = "AXP" | |
def search_symbol(symbol, driver): | |
elem = driver.find_element_by_id("ticker") | |
elem.send_keys(symbol) | |
elem.send_keys(Keys.RETURN) | |
driver.implicitly_wait(3) | |
def find_estim_tables(driver): | |
''' | |
get a table with earnings on a page ZACK | |
''' | |
#Find required table | |
elem = driver.find_element_by_xpath('//*[@id="earnings_announcements_earnings_table"]') | |
#get all rows from the table | |
rows = [row.text.encode("utf8") for row in elem.find_elements_by_tag_name('tr')] | |
#convert list of bytes to list of strings | |
rows = [row.decode("utf-8").replace("\n"," ") for row in rows] | |
#convert list of strings to a dataframe | |
earnings_history_df = pd.read_csv(io.StringIO('\n'.join(rows)), delim_whitespace=True, header = 0, names = ['date', 'Period_Ending', 'Estimate', 'Reported', 'Surprise', 'Surprise_%', "str1", "str2"]) | |
#Find the latest estimate | |
elem = driver.find_element_by_xpath('//*[@id="right_content"]/section[2]/div') | |
#get all rows from the table | |
rows1 = [row.text.encode("utf8") for row in elem.find_elements_by_tag_name('td')] | |
rows2 = [row.text.encode("utf8") for row in elem.find_elements_by_tag_name('th')] | |
#convert list of bytes to list of strings | |
rows1 = [row.decode("utf-8").replace("\n"," ") for row in rows1] | |
rows2 = [row.decode("utf-8").replace("\n"," ") for row in rows2] | |
#convert list of strings to a dataframe | |
earnings_latest_df = pd.DataFrame(rows1).transpose() | |
earnings_latest_df.columns = ['Period_Ending', 'Estimate', 'Surprise_%'] | |
earnings_latest_df["date"] = rows2[-1].split(" ")[0] | |
earnings = pd.concat([earnings_latest_df, earnings_history_df], ignore_index = True, sort = False) | |
earnings['date'] = pd.to_datetime(earnings['date']) | |
earnings.set_index(["date"], inplace = True) | |
return earnings | |
def find_divid_tables(driver): | |
''' | |
get a table with dividends on a page ZACK | |
''' | |
element = driver.find_element_by_xpath('//*[@id="earnings_announcements_tabs"]/ul') | |
driver.execute_script('arguments[0].scrollIntoView({block: "center", inline: "center"})', element) | |
elem = driver.find_element_by_xpath('//*[@id="ui-id-7"]') | |
elem.click() | |
time.sleep(3) | |
#Expand 100 records | |
dropdown = driver.find_element_by_name("earnings_announcements_dividends_table_length") | |
Select(dropdown).select_by_visible_text("100") | |
#Find required table | |
elem = driver.find_element_by_xpath('//*[@id="earnings_announcements_dividends_table"]') | |
#get all rows from the table | |
rows = [row.text.encode("utf8") for row in elem.find_elements_by_tag_name('tr')] | |
#convert list of bytes to list of strings | |
rows = [row.decode("utf-8").replace("\n"," ") for row in rows] | |
#convert list of strings to a dataframe | |
dividends_history_df = pd.read_csv(io.StringIO('\n'.join(rows)), delim_whitespace=True, header = 0, names = ['Date_Paid', 'Amount', 'Date_Announced', 'Ex-Dividend_Date']) | |
dividends_history_df.dropna(axis = 0, inplace = True) | |
dividends_history_df['Date_Paid'] = pd.to_datetime(dividends_history_df['Date_Paid']) | |
dividends_history_df['Date_Announced'] = pd.to_datetime(dividends_history_df['Date_Announced']) | |
dividends_history_df['Ex-Dividend_Date'] = pd.to_datetime(dividends_history_df['Ex-Dividend_Date']) | |
dividends_history_df.rename(columns = {"Date_Announced":"date"}, inplace = True) | |
dividends_history_df.set_index(["date"], inplace = True) | |
return dividends_history_df | |
def get_earn_and_dividends(symbol): | |
''' | |
This function launches browser for data load and fetches earnings and dividends data | |
''' | |
#Start Chrome | |
driver = webdriver.Chrome(ChromeDriverManager().install()) | |
#Go to the website | |
driver.get('https://www.zacks.com/stock/research/CSCO/earnings-calendar') | |
#Search stock | |
search_symbol(symbol, driver) | |
#expand_100_earnings values: | |
dropdown = driver.find_element_by_name("earnings_announcements_earnings_table_length") | |
Select(dropdown).select_by_visible_text("100") | |
time.sleep(3) | |
#Get earnings | |
earnings = find_estim_tables(driver) | |
time.sleep(3) | |
#Ge dividends | |
dividends = find_divid_tables(driver) | |
#Close browser | |
driver.close() | |
#Transforming Earnings dataframe to a final version | |
#Transform string values to numeric | |
earnings.replace({"--":np.nan},inplace = True) | |
earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",","")) | |
earnings["surprise_%"] = earnings["Surprise_%"]/100 | |
earnings["date_of_report"] = earnings.index | |
#getting expected future earnings change | |
earnings["future_estimate"] = earnings.Estimate.shift(1) | |
earnings["previous_surprise"] = earnings["surprise_%"].shift(-1) | |
earnings["expected_growth"]= (earnings.future_estimate - earnings.Reported)/earnings.Reported | |
earnings = earnings[["surprise_%", "expected_growth", "previous_surprise", "date_of_report"]] | |
#Transforming Dividends dataframe to a final version | |
#STR to value | |
dividends.replace({"--":np.nan},inplace = True) | |
dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$","")) | |
#get date that we later can use to count days after the announcement | |
dividends["date_announced"] = dividends.index | |
#Getting dividends trend | |
dividends["previous_divid"] = dividends.Amount.shift(-1) | |
dividends["dividends_change"] = (dividends.Amount - dividends.previous_divid)/dividends.previous_divid | |
dividends = dividends[dividends.dividends_change != 0] | |
dividends["prev_div_change"] = dividends.dividends_change.shift(-1) | |
dividends = dividends[["dividends_change","prev_div_change","date_announced"]] | |
#earnings = a.copy() | |
#dividends = b.copy() | |
#Match earnings with dates | |
#Creating Dates dataframe with all possible dates values | |
dates_df=pd.DataFrame() | |
dates_df["date"] = pd.date_range(start=earnings.index.min(), end=earnings.index.max()) | |
#Set dates column as index | |
dates_df.set_index(["date"], inplace = True) | |
#Creating a dates_earnings dataset where we extrapolate existing quarterly data to daily | |
dates_earnings = dates_df.copy() | |
dates_earnings = dates_earnings.join(earnings, how = 'left') | |
dates_earnings.sort_values(by = 'date', axis = 0, ascending = True, inplace = True) | |
dates_earnings.ffill(axis = 0, inplace = True) | |
dates_earnings.sort_values(by = 'date', axis = 0, ascending = False, inplace = True) | |
dates_earnings["days_after_earn_report"] = dates_earnings.index - dates_earnings["date_of_report"] | |
dates_earnings['days_after_earn_report'] = pd.to_numeric(dates_earnings['days_after_earn_report'].dt.days, downcast='integer') | |
dates_earnings.drop(["date_of_report"], axis = 1, inplace = True) | |
#Match dividends with dates | |
#Creating Dates dataframe with all possible dates values | |
if dividends.empty: | |
dates_dividends = pd.DataFrame(columns = ["days_after_divid_report", "dividends_change","prev_div_change"]) | |
dates_dividends.index.names = ['date'] | |
else: | |
dates_df=pd.DataFrame() | |
dates_df["date"] = pd.date_range(start=dividends.index.min(), end=date.today()) | |
#Set dates column as index | |
dates_df.set_index(["date"], inplace = True) | |
#Creating a dates_ividends dataset where we extrapolate existing quarterly data to daily | |
dates_dividends = dates_df.copy() | |
dates_dividends = dates_dividends.join(dividends, how = 'left') | |
dates_dividends.sort_values(by = 'date', axis = 0, ascending = True, inplace = True) | |
dates_dividends.ffill(axis = 0, inplace = True) | |
dates_dividends.sort_values(by = 'date', axis = 0, ascending = False, inplace = True) | |
dates_dividends["days_after_divid_report"] = dates_dividends.index - dates_dividends["date_announced"] | |
dates_dividends['days_after_divid_report'] = pd.to_numeric(dates_dividends['days_after_divid_report'].dt.days, downcast='integer') | |
dates_dividends.drop(["date_announced"], axis = 1, inplace = True) | |
return dates_earnings, dates_dividends |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment