Skip to content

Instantly share code, notes, and snippets.

@rwev
Created December 16, 2018 04:29
Show Gist options
  • Save rwev/f2791e4810a7677e764d7b9219c310b0 to your computer and use it in GitHub Desktop.
Save rwev/f2791e4810a7677e764d7b9219c310b0 to your computer and use it in GitHub Desktop.
Python web-scraper for economic events on the Bloomberg Econoday calendar.
# -*- coding: utf-8 -*-
"""
ECONEV.PY: Economic Events
Python web-scraper for economic events on the Bloomberg Econoday calendar.
Saves result of scrape to plain text (in Eastern Time) for flexible processing by other applications.
Author: rwev (https://github.com/rwev)
Require Beautiful Soup web parser:
>pip install bs4
See usage:
>python econev.py --help
An example command, console output, and results, is present at the end of this file.
"""
from bs4 import BeautifulSoup
import urllib2
import datetime
import sys, os
import unicodedata
from optparse import OptionParser
class EventScraper():
def __init__(self):
self.base_url = 'http://us.econoday.com/byday.asp?'
print '\tBase URL beg:', self.base_url
self.hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36'}
print '\tUsing user agent:', self.hdr['User-Agent']
self.current_date_index = 0
self.queued_events = []
self.dates = []
def getNumberOfDays(self, num_days = 60):
print '\tGetting dates for', num_days, 'ahead...',
current_date = datetime.datetime.now() - datetime.timedelta(days=1)
days_ahead_remaining = num_days
while days_ahead_remaining > 0:
current_date += datetime.timedelta(days=1)
self.dates.append(datetime.datetime(current_date.year,
current_date.month,
current_date.day,
0,0,0,0))
days_ahead_remaining -= 1
print 'done'
def getDateRanges(self, YYYYMMDDstr1, YYYYMMDDstr2):
print '\tGetting dates for range ', YYYYMMDDstr1, ' - ', YYYYMMDDstr2,'...',
dt_beg = datetime.datetime.strptime(YYYYMMDDstr1, '%Y%m%d')
dt_end = datetime.datetime.strptime(YYYYMMDDstr2, '%Y%m%d')
if datetime.datetime.now() - datetime.timedelta(days=1) > dt_beg or \
datetime.datetime.now() - datetime.timedelta(days=1) > dt_end or \
dt_beg > dt_end:
raise ValueError('Invalid date range specified.')
current_date = dt_beg
while current_date <= dt_end:
self.dates.append(datetime.datetime(current_date.year,
current_date.month,
current_date.day,
0,0,0,0))
current_date += datetime.timedelta(days=1)
print 'done'
def getToday(self):
self.dates.append(datetime.datetime.now())
def getTomorrow(self):
self.dates.append(datetime.datetime.now() + datetime.timedelta(days=1))
def getNextEvent(self):
if self.queued_events is None:
return None
while not len(self.queued_events):
self.queued_events = self.getNextDaysEvents()
if self.queued_events is None:
return None
event = self.queued_events.pop(0)
return event
def getNextDaysEvents(self):
if self.current_date_index > len(self.dates)-1: return None
dt = self.dates[self.current_date_index]
curr_url = self.base_url + 'day=' + str(dt.day) +\
'&month=' + str(dt.month) +\
'&year=' + str(dt.year)
print '\tRetrieving events on', self.dates[self.current_date_index], '[Full URL:', curr_url, ']'
print
req = urllib2.Request(curr_url, headers = self.hdr)
response = urllib2.urlopen(req).read()
print '\tSite successfully opened and read.'
soup = BeautifulSoup(response, 'html.parser')
evtDescRows = soup.find_all('tr', class_ = "dailyeventtext")
evtInformation = []
for row in evtDescRows:
if (row.find_all('td')[0].get_text().find(':') != -1):
evtTime = row.find_all('td')[0].get_text()
if evtTime.index(':') == 1: evtTime = '0' + evtTime
evtTime12H_ET = int(evtTime[0:2])
evtTime24H_ET = evtTime12H_ET
if evtTime.find('PM') != -1 and evtTime.find('12:') == -1:
evtTime24H_ET = evtTime12H_ET + 12
evtTime24H_CT = evtTime24H_ET - 1
evtName = row.find_all('td')[2].find_all('a')[0].get_text()
evtDT = datetime.datetime(dt.year, dt.month, dt.day,
evtTime24H_CT, int(evtTime[3:5]), 0, 0)
evtInformation.append((evtName, evtDT))
self.current_date_index += 1
return evtInformation
class EventWriter():
def __init__(self, filename):
self.file = open(filename, 'w+')
def writeEvents(self, es, write_date_format, write_time_format):
is_event = True
while is_event:
event_result = es.getNextEvent()
if event_result is not None:
(event_name, event_datetime) = event_result
line_str = event_datetime.strftime(write_date_format + '\t' + write_time_format) + "\t" + event_name
line_str = unicodedata.normalize("NFKD", line_str)
print '\t\tWrite "' + line_str + '" to file...',
self.file.write(line_str + '\n')
print 'done'
else:
is_event = False
print 'All events written.'
print 'Closing filestream...',
self.file.close()
print 'done'
parser = OptionParser()
parser.add_option("--days", action="store_true", dest="use_number_of_days", default=False,
help="Use an integer number of future days. Requires --num to specify how many future days to pull")
parser.add_option("--daterange", action="store_true", dest="use_date_range", default=False,
help="Use a range of future days. Requires --datebeg and --dateend")
parser.add_option("--today", action="store_true", dest="use_today", default=False,
help="Use today's date. Requires no auxiliary args")
parser.add_option("--tomorrow", action="store_true", dest="use_tomorrow", default=False,
help="Use tomorrow's date. Requires no auxiliary args")
parser.add_option("--num", action="store", dest="num_days",type = "int", help="positive integer")
parser.add_option("--datebeg", action="store", dest="date_beg_str",type = "str", help="format YYYYMMDD")
parser.add_option("--dateend", action="store", dest="date_end_str",type = "str", help="format YYYYMMDD")
parser.add_option("-o", "--outfile", action="store", dest="filename", type = "str",
help="File to which output will be written. Overwrites existing file of same name in CWD")
parser.add_option("--dateformat", action="store", dest="write_date_format", type = "str", default = "%Y%m%d",
help="Date format string for output. See http://strftime.org/ for options.")
parser.add_option("--timeformat", action="store", dest="write_time_format", type = "str", default = "%H%M",
help="Time format string for output. See http://strftime.org/ for options.")
(options, args) = parser.parse_args(sys.argv)
if not options.filename:
parser.error('Argument -o, --outfile required: must give output filename')
if not options.use_number_of_days and not options.use_date_range \
and not options.use_today and not options.use_tomorrow:
parser.error('Must give one of --days, --daterange, --today, --tomorrow')
scraper = EventScraper()
if options.use_number_of_days:
if not options.num_days:
parser.error('Must give --num when using --days')
scraper.getNumberOfDays(options.num_days)
elif options.use_date_range:
if not options.date_beg_str or not options.date_end_str:
parser.error('Must give --datebeg and --dateend when using --daterange')
scraper.getDateRanges(options.date_beg_str, options.date_end_str)
elif options.use_today:
scraper.getToday()
elif options.use_tomorrow:
scraper.getTomorrow()
writer = EventWriter(options.filename)
writer.writeEvents(scraper, options.write_date_format, options.write_time_format)
"""
EXAMPLE
>python econev.py -o test.txt --days --num 10
----------
Base URL beg: http://us.econoday.com/byday.asp?
Using user agent: Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36
Getting dates for 10 ahead... done
Retrieving events on 2018-12-15 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=15&month=12&year=2018 ]
Site successfully opened and read.
Retrieving events on 2018-12-16 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=16&month=12&year=2018 ]
Site successfully opened and read.
Retrieving events on 2018-12-17 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=17&month=12&year=2018 ]
Site successfully opened and read.
Write "20181217 0730 Empire State Mfg Survey " to file... done
Write "20181217 0900 Housing Market Index " to file... done
Write "20181217 1030 3-Month Bill Auction" to file... done
Write "20181217 1030 6-Month Bill Auction" to file... done
Write "20181217 1500 Treasury International Capital " to file... done
Retrieving events on 2018-12-18 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=18&month=12&year=2018 ]
Site successfully opened and read.
Write "20181218 0730 Housing Starts " to file... done
Write "20181218 0755 Redbook " to file... done
Write "20181218 1000 4-Week Bill Announcement" to file... done
Write "20181218 1000 8-Week Bill Announcement" to file... done
Retrieving events on 2018-12-19 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=19&month=12&year=2018 ]
Site successfully opened and read.
Write "20181219 0600 MBA Mortgage Applications " to file... done
Write "20181219 0730 Current Account " to file... done
Write "20181219 0900 Existing Home Sales " to file... done
Write "20181219 0930 EIA Petroleum Status Report " to file... done
Write "20181219 1300 FOMC Meeting Announcement " to file... done
Write "20181219 1300 FOMC Forecasts " to file... done
Write "20181219 1330 Fed Chair Press Conference " to file... done
Retrieving events on 2018-12-20 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=20&month=12&year=2018 ]
Site successfully opened and read.
Write "20181220 0730 Jobless Claims " to file... done
Write "20181220 0730 Philadelphia Fed Business Outlook Survey " to file... done
Write "20181220 0900 Leading Indicators " to file... done
Write "20181220 0930 EIA Natural Gas Report " to file... done
Write "20181220 1000 3-Month Bill Announcement" to file... done
Write "20181220 1000 6-Month Bill Announcement" to file... done
Write "20181220 1000 2-Yr FRN Note Announcement" to file... done
Write "20181220 1000 2-Yr Note Announcement" to file... done
Write "20181220 1000 5-Yr Note Announcement" to file... done
Write "20181220 1000 7-Yr Note Announcement" to file... done
Write "20181220 1030 4-Week Bill Auction" to file... done
Write "20181220 1030 8-Week Bill Auction" to file... done
Write "20181220 1200 5-Yr TIPS Auction" to file... done
Write "20181220 1530 Fed Balance Sheet " to file... done
Write "20181220 1530 Money Supply " to file... done
Retrieving events on 2018-12-21 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=21&month=12&year=2018 ]
Site successfully opened and read.
Write "20181221 0730 Durable Goods Orders " to file... done
Write "20181221 0730 GDP " to file... done
Write "20181221 0730 Corporate Profits " to file... done
Write "20181221 0900 Personal Income and Outlays " to file... done
Write "20181221 0900 Consumer Sentiment " to file... done
Write "20181221 1000 Kansas City Fed Manufacturing Index " to file... done
Write "20181221 1200 Baker-Hughes Rig Count " to file... done
Retrieving events on 2018-12-22 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=22&month=12&year=2018 ]
Site successfully opened and read.
Retrieving events on 2018-12-23 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=23&month=12&year=2018 ]
Site successfully opened and read.
Retrieving events on 2018-12-24 00:00:00 [Full URL: http://us.econoday.com/byday.asp?day=24&month=12&year=2018 ]
Site successfully opened and read.
Write "20181224 0730 Chicago Fed National Activity Index " to file... done
Write "20181224 1000 4-Week Bill Announcement" to file... done
Write "20181224 1030 3-Month Bill Auction" to file... done
Write "20181224 1030 6-Month Bill Auction" to file... done
Write "20181224 1200 2-Yr Note Auction" to file... done
All events written.
Closing filestream... done
------------------------------
TEST.TXT
20181217 0730 Empire State Mfg Survey
20181217 0900 Housing Market Index
20181217 1030 3-Month Bill Auction
20181217 1030 6-Month Bill Auction
20181217 1500 Treasury International Capital
20181218 0730 Housing Starts
20181218 0755 Redbook
20181218 1000 4-Week Bill Announcement
20181218 1000 8-Week Bill Announcement
20181219 0600 MBA Mortgage Applications
20181219 0730 Current Account
20181219 0900 Existing Home Sales
20181219 0930 EIA Petroleum Status Report
20181219 1300 FOMC Meeting Announcement
20181219 1300 FOMC Forecasts
20181219 1330 Fed Chair Press Conference
20181220 0730 Jobless Claims
20181220 0730 Philadelphia Fed Business Outlook Survey
20181220 0900 Leading Indicators
20181220 0930 EIA Natural Gas Report
20181220 1000 3-Month Bill Announcement
20181220 1000 6-Month Bill Announcement
20181220 1000 2-Yr FRN Note Announcement
20181220 1000 2-Yr Note Announcement
20181220 1000 5-Yr Note Announcement
20181220 1000 7-Yr Note Announcement
20181220 1030 4-Week Bill Auction
20181220 1030 8-Week Bill Auction
20181220 1200 5-Yr TIPS Auction
20181220 1530 Fed Balance Sheet
20181220 1530 Money Supply
20181221 0730 Durable Goods Orders
20181221 0730 GDP
20181221 0730 Corporate Profits
20181221 0900 Personal Income and Outlays
20181221 0900 Consumer Sentiment
20181221 1000 Kansas City Fed Manufacturing Index
20181221 1200 Baker-Hughes Rig Count
20181224 0730 Chicago Fed National Activity Index
20181224 1000 4-Week Bill Announcement
20181224 1030 3-Month Bill Auction
20181224 1030 6-Month Bill Auction
20181224 1200 2-Yr Note Auction
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment