Last active
February 5, 2025 00:56
-
-
Save djmm187/43b10d0ff7946e75b45dc3fd4d6a3f4d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import json | |
import pendulum | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
BASE_URL = 'http://www.nasdaq.com/earnings/earnings-calendar.aspx' | |
BASE_PARAMS = { | |
'date': None | |
} | |
class FormatUtils: | |
"""Common formatter util for dates, numbers, and strings. | |
""" | |
@staticmethod | |
def iso_meta(date, key): | |
"""Takes a date in various formats, formats to iso format, | |
and returns a dict with the specified key | |
:param date: | |
some timestamp | |
:str date: String format, ex. ISO | |
:int date: UTC, ex. Milliseconds | |
:date date: Date object | |
:datetime date: Datetime Object | |
:param str key: | |
key for the returning dict | |
:returns: | |
dict containing | |
""" | |
try: | |
date = pendulum.parse(date.strip()).isoformat() | |
except Exception as e: | |
print('Date is not in a parsable format') | |
return {key: date} | |
@staticmethod | |
def currency_meta(cur, key): | |
"""Takes currency string and formats to float | |
:param str cur: | |
currency string, i.e. '$1.0', '$-9.23' | |
:param str key: | |
key for the returning dict | |
:returns: | |
float formatted currency | |
""" | |
num = None | |
try: | |
num = float(cur.strip().replace('$', '')) | |
except ValueError: | |
print('Current is not in correct format.') | |
return {key: num} | |
@staticmethod | |
def expand_currency(cur): | |
"""Expands a raw currency string to expanded float | |
:param str currency: | |
currency string, i.e. '1M', '12B', '12k' | |
:returns: | |
expanded value based on symbol or 0 if invalid | |
or symbol is inaccurate | |
""" | |
if not cur: | |
return None | |
num = cur[:-1] | |
notation = str(cur[-1]).lower() | |
if notation == 'k': | |
multiplier = 1000 | |
elif notation == 'm': | |
multiplier = 1000000 | |
elif notation == 'b': | |
multiplier = 1000000000 | |
else: | |
multiplier = 0 | |
return float(num) * multiplier | |
class NASDAQEarnings(FormatUtils): | |
"""Basic mappings and extractions for NASDAQ Earnings Schedule. | |
For raw markup structure, see: | |
http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=2016-Aug-04 | |
""" | |
COMPANY_RE = re.compile(r'(?P<name>.*?) \((?P<ticker>.*?)\) Market Cap: \$(?P<market_cap>.*?)$') | |
markup_map = { | |
'0': 'release_time', | |
'1': 'company_info', | |
'2': 'expected_report_date', | |
'3': 'fiscal_quarter_ending', | |
'4': 'avg_eps_forecast', | |
'5': 'num_ests', | |
'6': 'last_year_reporting_date', | |
'7': 'last_years_eps' | |
} | |
@classmethod | |
def release_time(cls, markup): | |
"""Parses our when earnings will be released based on the | |
provided symbolic link, i.e. iconCls | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with release time information. ex. | |
{"release_time": 'pre'} | |
Allowed options are: | |
- pre | |
- post | |
- uknown | |
""" | |
release_time = 'unknown' | |
anchor = markup.find('a') | |
if anchor: | |
symbol_link = anchor.get('href') | |
if 'premarket' in symbol_link: | |
release_time = 'pre' | |
if 'after-hours' in symbol_link: | |
release_time = 'post' | |
return {'release_time': release_time} | |
@classmethod | |
def company_info(cls, markup): | |
"""Parses out company name, ticker, and market cap at the time of | |
the query. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with name, ticker, market_cap, ex | |
{ | |
"name': "ABC Corp.", | |
"ticker': "ABCD", | |
"market_cap": 123123123.1 | |
} | |
""" | |
anchor = markup.find('a') | |
company_meta = cls.COMPANY_RE.search(anchor.text) | |
if company_meta: | |
company_meta = company_meta.groupdict() | |
expanded = cls.expand_currency( | |
company_meta['market_cap'] | |
) | |
company_meta.update({ | |
'market_cap': expanded | |
}) | |
return company_meta | |
@classmethod | |
def expected_report_date(cls, markup): | |
"""Parses out the expected report date. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with expected earnings date, ex | |
{"expected_report_date": '2016-07-12T15:22:29.005626-07:00'} | |
""" | |
return cls.iso_meta( | |
markup.text, | |
'expected_report_date' | |
) | |
@classmethod | |
def fiscal_quarter_ending(cls, markup): | |
"""Parses out the quarter end date. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with estimate information, ex | |
{"fiscal_quarter_ending": '2016-07-12T15:22:29.005626-07:00'} | |
""" | |
return cls.iso_meta( | |
markup.text, | |
'fiscal_quarter_ending' | |
) | |
@classmethod | |
def avg_eps_forecast(cls, markup): | |
"""Parses out the avg EPS forecast. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with avg EPS forecast information, ex | |
{"avg_eps_forecast": 2.1} | |
""" | |
return cls.currency_meta( | |
markup.text, | |
'avg_eps_forecast' | |
) | |
@classmethod | |
def num_ests(cls, markup): | |
"""Returns the total number of firms that made estimages | |
publicly available. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with estimate information, ex | |
{"num_est": 2} | |
""" | |
return {'num_ests': int(markup.text)} | |
@classmethod | |
def last_year_reporting_date(cls, markup): | |
"""Parses out the last years reporting date. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with last years reporting date, ex | |
{"last_year_reporting_date": '2016-07-12T15:22:29.005626-07:00'} | |
""" | |
return cls.iso_meta( | |
markup.text, | |
'last_year_reporting_date' | |
) | |
@classmethod | |
def last_years_eps(cls, markup): | |
"""Parses out the last years EPS. | |
:param str markup: | |
raw html table cell containing information | |
:returns: | |
dict with last years EPS, ex | |
{"last_years_eps": 2.12} | |
""" | |
return cls.currency_meta( | |
markup.text, | |
'last_years_eps' | |
) | |
def format_date(day): | |
"""Take a date and attempts to format it to `2017-Jul-28` | |
:param day: | |
Target date for companies with pre and post market earnings | |
releases. | |
:str day: String format, ex. ISO | |
:int day: UTC, ex. Milliseconds | |
:date day: Date object | |
:datetime day: Datetime Object | |
:returns: | |
str with formatted datetime in `%Y-%B-%d` | |
""" | |
formatted = None | |
try: | |
formatted = pendulum.parse(day) | |
except ValueError as e: | |
raise e | |
return formatted.format('%Y-%b-%d') | |
def get_page(day): | |
"""Get the raw HTML of the page containing earnings | |
:param str day: | |
formatted date string for target date | |
:returns: | |
str format of raw html | |
""" | |
params = { | |
**{'date': day}, | |
**BASE_PARAMS | |
} | |
page = None | |
resp = requests.get(BASE_URL, params=params) | |
if resp.ok: | |
page = resp.text | |
return page | |
def get_companies(page): | |
"""Take the raw html page as a string, parsers it out using | |
BeautifulSoup, and extracts earning schedule for each company. | |
:param str page: | |
raw html page | |
:returns: | |
list basic company objects containing earnings schedule and | |
basic est data | |
""" | |
companies = [] | |
soup = BeautifulSoup(page, 'html.parser') | |
tbl = soup.find('table', class_='USMN_EarningsCalendar') | |
tbl_rows = tbl.find_all('tr') | |
for row in tbl_rows[1:]: | |
companies.append(extract_meta(row)) | |
return companies | |
def extract_meta(row): | |
"""Grabs the page mapping and extracts all posted company earnings | |
information based on the map. | |
:param str row: | |
table row containing information a specific company | |
:returns: | |
dict containing extract information | |
sample output: | |
{ | |
"release_time": "pre", | |
"name": "Zimmer Biomet Holdings, Inc.", | |
"ticker": "ZBH", | |
"market_cap": 25950000000.0, | |
"expected_report_date": "2017-07-27T00:00:00+00:00", | |
"fiscal_quarter_ending": "2017-06-27T00:00:00+00:00", | |
"avg_eps_forecast": 2.1, | |
"num_ests": 15, | |
"last_year_reporting_date": "2016-07-28T00:00:00+00:00", | |
"last_years_eps": 2.02 | |
} | |
""" | |
company = {} | |
cells = row.find_all('td') | |
for k, v in NASDAQEarnings.markup_map.items(): | |
idx = int(k) | |
info = getattr(NASDAQEarnings, v)(cells[idx]) | |
for k, v in info.items(): | |
company[k] = v | |
return company | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Earnings Dates') | |
parser.add_argument('-d', '--date', dest='earnings_date', | |
help='Enter a date to return companies ' + | |
'with earnings that day.') | |
args = parser.parse_args() | |
day = format_date(args.earnings_date) | |
page = get_page(day) | |
print(json.dumps(get_companies(page), indent=2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment