Skip to content

Instantly share code, notes, and snippets.

@djmm187
Last active February 5, 2025 00:56
Show Gist options
  • Save djmm187/43b10d0ff7946e75b45dc3fd4d6a3f4d to your computer and use it in GitHub Desktop.
Save djmm187/43b10d0ff7946e75b45dc3fd4d6a3f4d to your computer and use it in GitHub Desktop.
import argparse
import json
import pendulum
import re
import requests
from bs4 import BeautifulSoup
BASE_URL = 'http://www.nasdaq.com/earnings/earnings-calendar.aspx'
BASE_PARAMS = {
'date': None
}
class FormatUtils:
"""Common formatter util for dates, numbers, and strings.
"""
@staticmethod
def iso_meta(date, key):
"""Takes a date in various formats, formats to iso format,
and returns a dict with the specified key
:param date:
some timestamp
:str date: String format, ex. ISO
:int date: UTC, ex. Milliseconds
:date date: Date object
:datetime date: Datetime Object
:param str key:
key for the returning dict
:returns:
dict containing
"""
try:
date = pendulum.parse(date.strip()).isoformat()
except Exception as e:
print('Date is not in a parsable format')
return {key: date}
@staticmethod
def currency_meta(cur, key):
"""Takes currency string and formats to float
:param str cur:
currency string, i.e. '$1.0', '$-9.23'
:param str key:
key for the returning dict
:returns:
float formatted currency
"""
num = None
try:
num = float(cur.strip().replace('$', ''))
except ValueError:
print('Current is not in correct format.')
return {key: num}
@staticmethod
def expand_currency(cur):
"""Expands a raw currency string to expanded float
:param str currency:
currency string, i.e. '1M', '12B', '12k'
:returns:
expanded value based on symbol or 0 if invalid
or symbol is inaccurate
"""
if not cur:
return None
num = cur[:-1]
notation = str(cur[-1]).lower()
if notation == 'k':
multiplier = 1000
elif notation == 'm':
multiplier = 1000000
elif notation == 'b':
multiplier = 1000000000
else:
multiplier = 0
return float(num) * multiplier
class NASDAQEarnings(FormatUtils):
"""Basic mappings and extractions for NASDAQ Earnings Schedule.
For raw markup structure, see:
http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=2016-Aug-04
"""
COMPANY_RE = re.compile(r'(?P<name>.*?) \((?P<ticker>.*?)\) Market Cap: \$(?P<market_cap>.*?)$')
markup_map = {
'0': 'release_time',
'1': 'company_info',
'2': 'expected_report_date',
'3': 'fiscal_quarter_ending',
'4': 'avg_eps_forecast',
'5': 'num_ests',
'6': 'last_year_reporting_date',
'7': 'last_years_eps'
}
@classmethod
def release_time(cls, markup):
"""Parses our when earnings will be released based on the
provided symbolic link, i.e. iconCls
:param str markup:
raw html table cell containing information
:returns:
dict with release time information. ex.
{"release_time": 'pre'}
Allowed options are:
- pre
- post
- uknown
"""
release_time = 'unknown'
anchor = markup.find('a')
if anchor:
symbol_link = anchor.get('href')
if 'premarket' in symbol_link:
release_time = 'pre'
if 'after-hours' in symbol_link:
release_time = 'post'
return {'release_time': release_time}
@classmethod
def company_info(cls, markup):
"""Parses out company name, ticker, and market cap at the time of
the query.
:param str markup:
raw html table cell containing information
:returns:
dict with name, ticker, market_cap, ex
{
"name': "ABC Corp.",
"ticker': "ABCD",
"market_cap": 123123123.1
}
"""
anchor = markup.find('a')
company_meta = cls.COMPANY_RE.search(anchor.text)
if company_meta:
company_meta = company_meta.groupdict()
expanded = cls.expand_currency(
company_meta['market_cap']
)
company_meta.update({
'market_cap': expanded
})
return company_meta
@classmethod
def expected_report_date(cls, markup):
"""Parses out the expected report date.
:param str markup:
raw html table cell containing information
:returns:
dict with expected earnings date, ex
{"expected_report_date": '2016-07-12T15:22:29.005626-07:00'}
"""
return cls.iso_meta(
markup.text,
'expected_report_date'
)
@classmethod
def fiscal_quarter_ending(cls, markup):
"""Parses out the quarter end date.
:param str markup:
raw html table cell containing information
:returns:
dict with estimate information, ex
{"fiscal_quarter_ending": '2016-07-12T15:22:29.005626-07:00'}
"""
return cls.iso_meta(
markup.text,
'fiscal_quarter_ending'
)
@classmethod
def avg_eps_forecast(cls, markup):
"""Parses out the avg EPS forecast.
:param str markup:
raw html table cell containing information
:returns:
dict with avg EPS forecast information, ex
{"avg_eps_forecast": 2.1}
"""
return cls.currency_meta(
markup.text,
'avg_eps_forecast'
)
@classmethod
def num_ests(cls, markup):
"""Returns the total number of firms that made estimages
publicly available.
:param str markup:
raw html table cell containing information
:returns:
dict with estimate information, ex
{"num_est": 2}
"""
return {'num_ests': int(markup.text)}
@classmethod
def last_year_reporting_date(cls, markup):
"""Parses out the last years reporting date.
:param str markup:
raw html table cell containing information
:returns:
dict with last years reporting date, ex
{"last_year_reporting_date": '2016-07-12T15:22:29.005626-07:00'}
"""
return cls.iso_meta(
markup.text,
'last_year_reporting_date'
)
@classmethod
def last_years_eps(cls, markup):
"""Parses out the last years EPS.
:param str markup:
raw html table cell containing information
:returns:
dict with last years EPS, ex
{"last_years_eps": 2.12}
"""
return cls.currency_meta(
markup.text,
'last_years_eps'
)
def format_date(day):
"""Take a date and attempts to format it to `2017-Jul-28`
:param day:
Target date for companies with pre and post market earnings
releases.
:str day: String format, ex. ISO
:int day: UTC, ex. Milliseconds
:date day: Date object
:datetime day: Datetime Object
:returns:
str with formatted datetime in `%Y-%B-%d`
"""
formatted = None
try:
formatted = pendulum.parse(day)
except ValueError as e:
raise e
return formatted.format('%Y-%b-%d')
def get_page(day):
"""Get the raw HTML of the page containing earnings
:param str day:
formatted date string for target date
:returns:
str format of raw html
"""
params = {
**{'date': day},
**BASE_PARAMS
}
page = None
resp = requests.get(BASE_URL, params=params)
if resp.ok:
page = resp.text
return page
def get_companies(page):
"""Take the raw html page as a string, parsers it out using
BeautifulSoup, and extracts earning schedule for each company.
:param str page:
raw html page
:returns:
list basic company objects containing earnings schedule and
basic est data
"""
companies = []
soup = BeautifulSoup(page, 'html.parser')
tbl = soup.find('table', class_='USMN_EarningsCalendar')
tbl_rows = tbl.find_all('tr')
for row in tbl_rows[1:]:
companies.append(extract_meta(row))
return companies
def extract_meta(row):
"""Grabs the page mapping and extracts all posted company earnings
information based on the map.
:param str row:
table row containing information a specific company
:returns:
dict containing extract information
sample output:
{
"release_time": "pre",
"name": "Zimmer Biomet Holdings, Inc.",
"ticker": "ZBH",
"market_cap": 25950000000.0,
"expected_report_date": "2017-07-27T00:00:00+00:00",
"fiscal_quarter_ending": "2017-06-27T00:00:00+00:00",
"avg_eps_forecast": 2.1,
"num_ests": 15,
"last_year_reporting_date": "2016-07-28T00:00:00+00:00",
"last_years_eps": 2.02
}
"""
company = {}
cells = row.find_all('td')
for k, v in NASDAQEarnings.markup_map.items():
idx = int(k)
info = getattr(NASDAQEarnings, v)(cells[idx])
for k, v in info.items():
company[k] = v
return company
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Earnings Dates')
parser.add_argument('-d', '--date', dest='earnings_date',
help='Enter a date to return companies ' +
'with earnings that day.')
args = parser.parse_args()
day = format_date(args.earnings_date)
page = get_page(day)
print(json.dumps(get_companies(page), indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment