Created
December 16, 2018 04:29
-
-
Save rwev/f2791e4810a7677e764d7b9219c310b0 to your computer and use it in GitHub Desktop.
Python web-scraper for economic events on the Bloomberg Econoday calendar.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
ECONEV.PY: Economic Events | |
Python web-scraper for economic events on the Bloomberg Econoday calendar. | |
Saves result of scrape to plain text (in Eastern Time) for flexible processing by other applications. | |
Author: rwev (https://github.com/rwev) | |
Require Beautiful Soup web parser: | |
>pip install bs4 | |
See usage: | |
>python econev.py --help | |
An example command, console output, and results, is present at the end of this file. | |
""" | |
from bs4 import BeautifulSoup | |
import urllib2 | |
import datetime | |
import sys, os | |
import unicodedata | |
from optparse import OptionParser | |
class EventScraper(): | |
def __init__(self): | |
self.base_url = 'http://us.econoday.com/byday.asp?' | |
print '\tBase URL beg:', self.base_url | |
self.hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36'} | |
print '\tUsing user agent:', self.hdr['User-Agent'] | |
self.current_date_index = 0 | |
self.queued_events = [] | |
self.dates = [] | |
def getNumberOfDays(self, num_days = 60): | |
print '\tGetting dates for', num_days, 'ahead...', | |
current_date = datetime.datetime.now() - datetime.timedelta(days=1) | |
days_ahead_remaining = num_days | |
while days_ahead_remaining > 0: | |
current_date += datetime.timedelta(days=1) | |
self.dates.append(datetime.datetime(current_date.year, | |
current_date.month, | |
current_date.day, | |
0,0,0,0)) | |
days_ahead_remaining -= 1 | |
print 'done' | |
def getDateRanges(self, YYYYMMDDstr1, YYYYMMDDstr2): | |
print '\tGetting dates for range ', YYYYMMDDstr1, ' - ', YYYYMMDDstr2,'...', | |
dt_beg = datetime.datetime.strptime(YYYYMMDDstr1, '%Y%m%d') | |
dt_end = datetime.datetime.strptime(YYYYMMDDstr2, '%Y%m%d') | |
if datetime.datetime.now() - datetime.timedelta(days=1) > dt_beg or \ | |
datetime.datetime.now() - datetime.timedelta(days=1) > dt_end or \ | |
dt_beg > dt_end: | |
raise ValueError('Invalid date range specified.') | |
current_date = dt_beg | |
while current_date <= dt_end: | |
self.dates.append(datetime.datetime(current_date.year, | |
current_date.month, | |
current_date.day, | |
0,0,0,0)) | |
current_date += datetime.timedelta(days=1) | |
print 'done' | |
def getToday(self): | |
self.dates.append(datetime.datetime.now()) | |
def getTomorrow(self): | |
self.dates.append(datetime.datetime.now() + datetime.timedelta(days=1)) | |
def getNextEvent(self): | |
if self.queued_events is None: | |
return None | |
while not len(self.queued_events): | |
self.queued_events = self.getNextDaysEvents() | |
if self.queued_events is None: | |
return None | |
event = self.queued_events.pop(0) | |
return event | |
def getNextDaysEvents(self): | |
if self.current_date_index > len(self.dates)-1: return None | |
dt = self.dates[self.current_date_index] | |
curr_url = self.base_url + 'day=' + str(dt.day) +\ | |
'&month=' + str(dt.month) +\ | |
'&year=' + str(dt.year) | |
print '\tRetrieving events on', self.dates[self.current_date_index], '[Full URL:', curr_url, ']' | |
req = urllib2.Request(curr_url, headers = self.hdr) | |
response = urllib2.urlopen(req).read() | |
print '\tSite successfully opened and read.' | |
soup = BeautifulSoup(response, 'html.parser') | |
evtDescRows = soup.find_all('tr', class_ = "dailyeventtext") | |
evtInformation = [] | |
for row in evtDescRows: | |
if (row.find_all('td')[0].get_text().find(':') != -1): | |
evtTime = row.find_all('td')[0].get_text() | |
if evtTime.index(':') == 1: evtTime = '0' + evtTime | |
evtTime12H_ET = int(evtTime[0:2]) | |
evtTime24H_ET = evtTime12H_ET | |
if evtTime.find('PM') != -1 and evtTime.find('12:') == -1: | |
evtTime24H_ET = evtTime12H_ET + 12 | |
evtTime24H_CT = evtTime24H_ET - 1 | |
evtName = row.find_all('td')[2].find_all('a')[0].get_text() | |
evtDT = datetime.datetime(dt.year, dt.month, dt.day, | |
evtTime24H_CT, int(evtTime[3:5]), 0, 0) | |
evtInformation.append((evtName, evtDT)) | |
self.current_date_index += 1 | |
return evtInformation | |
class EventWriter(): | |
def __init__(self, filename): | |
self.file = open(filename, 'w+') | |
def writeEvents(self, es, write_date_format, write_time_format): | |
is_event = True | |
while is_event: | |
event_result = es.getNextEvent() | |
if event_result is not None: | |
(event_name, event_datetime) = event_result | |
line_str = event_datetime.strftime(write_date_format + '\t' + write_time_format) + "\t" + event_name | |
line_str = unicodedata.normalize("NFKD", line_str) | |
print '\t\tWrite "' + line_str + '" to file...', | |
self.file.write(line_str + '\n') | |
print 'done' | |
else: | |
is_event = False | |
print 'All events written.' | |
print 'Closing filestream...', | |
self.file.close() | |
print 'done' | |
parser = OptionParser() | |
parser.add_option("--days", action="store_true", dest="use_number_of_days", default=False, | |
help="Use an integer number of future days. Requires --num to specify how many future days to pull") | |
parser.add_option("--daterange", action="store_true", dest="use_date_range", default=False, | |
help="Use a range of future days. Requires --datebeg and --dateend") | |
parser.add_option("--today", action="store_true", dest="use_today", default=False, | |
help="Use today's date. Requires no auxiliary args") | |
parser.add_option("--tomorrow", action="store_true", dest="use_tomorrow", default=False, | |
help="Use tomorrow's date. Requires no auxiliary args") | |
parser.add_option("--num", action="store", dest="num_days",type = "int", help="positive integer") | |
parser.add_option("--datebeg", action="store", dest="date_beg_str",type = "str", help="format YYYYMMDD") | |
parser.add_option("--dateend", action="store", dest="date_end_str",type = "str", help="format YYYYMMDD") | |
parser.add_option("-o", "--outfile", action="store", dest="filename", type = "str", | |
help="File to which output will be written. Overwrites existing file of same name in CWD") | |
parser.add_option("--dateformat", action="store", dest="write_date_format", type = "str", default = "%Y%m%d", | |
help="Date format string for output. See http://strftime.org/ for options.") | |
parser.add_option("--timeformat", action="store", dest="write_time_format", type = "str", default = "%H%M", | |
help="Time format string for output. See http://strftime.org/ for options.") | |
(options, args) = parser.parse_args(sys.argv) | |
if not options.filename: | |
parser.error('Argument -o, --outfile required: must give output filename') | |
if not options.use_number_of_days and not options.use_date_range \ | |
and not options.use_today and not options.use_tomorrow: | |
parser.error('Must give one of --days, --daterange, --today, --tomorrow') | |
scraper = EventScraper() | |
if options.use_number_of_days: | |
if not options.num_days: | |
parser.error('Must give --num when using --days') | |
scraper.getNumberOfDays(options.num_days) | |
elif options.use_date_range: | |
if not options.date_beg_str or not options.date_end_str: | |
parser.error('Must give --datebeg and --dateend when using --daterange') | |
scraper.getDateRanges(options.date_beg_str, options.date_end_str) | |
elif options.use_today: | |
scraper.getToday() | |
elif options.use_tomorrow: | |
scraper.getTomorrow() | |
writer = EventWriter(options.filename) | |
writer.writeEvents(scraper, options.write_date_format, options.write_time_format) | |
""" | |
EXAMPLE | |
>python econev.py -o test.txt --days --num 10 | |
---------- | |
Base URL beg: http://us.econoday.com/byday.asp? | |
Using user agent: Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36 | |
Getting dates for 10 ahead... done | |
Retrieving events on 2018-12-15 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=15&month=12&year=2018 ] | |
Site successfully opened and read. | |
Retrieving events on 2018-12-16 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=16&month=12&year=2018 ] | |
Site successfully opened and read. | |
Retrieving events on 2018-12-17 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=17&month=12&year=2018 ] | |
Site successfully opened and read. | |
Write "20181217 0730 Empire State Mfg Survey " to file... done | |
Write "20181217 0900 Housing Market Index " to file... done | |
Write "20181217 1030 3-Month Bill Auction" to file... done | |
Write "20181217 1030 6-Month Bill Auction" to file... done | |
Write "20181217 1500 Treasury International Capital " to file... done | |
Retrieving events on 2018-12-18 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=18&month=12&year=2018 ] | |
Site successfully opened and read. | |
Write "20181218 0730 Housing Starts " to file... done | |
Write "20181218 0755 Redbook " to file... done | |
Write "20181218 1000 4-Week Bill Announcement" to file... done | |
Write "20181218 1000 8-Week Bill Announcement" to file... done | |
Retrieving events on 2018-12-19 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=19&month=12&year=2018 ] | |
Site successfully opened and read. | |
Write "20181219 0600 MBA Mortgage Applications " to file... done | |
Write "20181219 0730 Current Account " to file... done | |
Write "20181219 0900 Existing Home Sales " to file... done | |
Write "20181219 0930 EIA Petroleum Status Report " to file... done | |
Write "20181219 1300 FOMC Meeting Announcement " to file... done | |
Write "20181219 1300 FOMC Forecasts " to file... done | |
Write "20181219 1330 Fed Chair Press Conference " to file... done | |
Retrieving events on 2018-12-20 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=20&month=12&year=2018 ] | |
Site successfully opened and read. | |
Write "20181220 0730 Jobless Claims " to file... done | |
Write "20181220 0730 Philadelphia Fed Business Outlook Survey " to file... done | |
Write "20181220 0900 Leading Indicators " to file... done | |
Write "20181220 0930 EIA Natural Gas Report " to file... done | |
Write "20181220 1000 3-Month Bill Announcement" to file... done | |
Write "20181220 1000 6-Month Bill Announcement" to file... done | |
Write "20181220 1000 2-Yr FRN Note Announcement" to file... done | |
Write "20181220 1000 2-Yr Note Announcement" to file... done | |
Write "20181220 1000 5-Yr Note Announcement" to file... done | |
Write "20181220 1000 7-Yr Note Announcement" to file... done | |
Write "20181220 1030 4-Week Bill Auction" to file... done | |
Write "20181220 1030 8-Week Bill Auction" to file... done | |
Write "20181220 1200 5-Yr TIPS Auction" to file... done | |
Write "20181220 1530 Fed Balance Sheet " to file... done | |
Write "20181220 1530 Money Supply " to file... done | |
Retrieving events on 2018-12-21 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=21&month=12&year=2018 ] | |
Site successfully opened and read. | |
Write "20181221 0730 Durable Goods Orders " to file... done | |
Write "20181221 0730 GDP " to file... done | |
Write "20181221 0730 Corporate Profits " to file... done | |
Write "20181221 0900 Personal Income and Outlays " to file... done | |
Write "20181221 0900 Consumer Sentiment " to file... done | |
Write "20181221 1000 Kansas City Fed Manufacturing Index " to file... done | |
Write "20181221 1200 Baker-Hughes Rig Count " to file... done | |
Retrieving events on 2018-12-22 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=22&month=12&year=2018 ] | |
Site successfully opened and read. | |
Retrieving events on 2018-12-23 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=23&month=12&year=2018 ] | |
Site successfully opened and read. | |
Retrieving events on 2018-12-24 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=24&month=12&year=2018 ] | |
Site successfully opened and read. | |
Write "20181224 0730 Chicago Fed National Activity Index " to file... done | |
Write "20181224 1000 4-Week Bill Announcement" to file... done | |
Write "20181224 1030 3-Month Bill Auction" to file... done | |
Write "20181224 1030 6-Month Bill Auction" to file... done | |
Write "20181224 1200 2-Yr Note Auction" to file... done | |
All events written. | |
Closing filestream... done | |
------------------------------ | |
TEST.TXT | |
20181217 0730 Empire State Mfg Survey | |
20181217 0900 Housing Market Index | |
20181217 1030 3-Month Bill Auction | |
20181217 1030 6-Month Bill Auction | |
20181217 1500 Treasury International Capital | |
20181218 0730 Housing Starts | |
20181218 0755 Redbook | |
20181218 1000 4-Week Bill Announcement | |
20181218 1000 8-Week Bill Announcement | |
20181219 0600 MBA Mortgage Applications | |
20181219 0730 Current Account | |
20181219 0900 Existing Home Sales | |
20181219 0930 EIA Petroleum Status Report | |
20181219 1300 FOMC Meeting Announcement | |
20181219 1300 FOMC Forecasts | |
20181219 1330 Fed Chair Press Conference | |
20181220 0730 Jobless Claims | |
20181220 0730 Philadelphia Fed Business Outlook Survey | |
20181220 0900 Leading Indicators | |
20181220 0930 EIA Natural Gas Report | |
20181220 1000 3-Month Bill Announcement | |
20181220 1000 6-Month Bill Announcement | |
20181220 1000 2-Yr FRN Note Announcement | |
20181220 1000 2-Yr Note Announcement | |
20181220 1000 5-Yr Note Announcement | |
20181220 1000 7-Yr Note Announcement | |
20181220 1030 4-Week Bill Auction | |
20181220 1030 8-Week Bill Auction | |
20181220 1200 5-Yr TIPS Auction | |
20181220 1530 Fed Balance Sheet | |
20181220 1530 Money Supply | |
20181221 0730 Durable Goods Orders | |
20181221 0730 GDP | |
20181221 0730 Corporate Profits | |
20181221 0900 Personal Income and Outlays | |
20181221 0900 Consumer Sentiment | |
20181221 1000 Kansas City Fed Manufacturing Index | |
20181221 1200 Baker-Hughes Rig Count | |
20181224 0730 Chicago Fed National Activity Index | |
20181224 1000 4-Week Bill Announcement | |
20181224 1030 3-Month Bill Auction | |
20181224 1030 6-Month Bill Auction | |
20181224 1200 2-Yr Note Auction | |
""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment