Skip to content

Instantly share code, notes, and snippets.

@KWMalik
Forked from ipl31/improved_tutorial.py
Created November 24, 2012 17:02
Show Gist options
  • Save KWMalik/4140521 to your computer and use it in GitHub Desktop.
Save KWMalik/4140521 to your computer and use it in GitHub Desktop.
Improved Tutorial 9 to be a little more pythonic and also track tickers with matched events
'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license. Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.
Created on March, 5, 2012
@author: Sourabh Bajaj
@contact: sourabhbajaj90@gmail.com
@summary: Event Profiler Tutorial
'''
# Native python modules
import argparse
import copy
import datetime as dt
# Third party modules
import matplotlib.pyplot as plt
import numpy as np
# QSTK modules
import qstkutil.qsdateutil as du
from qstkutil import DataAccess as da
import kvEventProfiler2 as ep
"""
Accepts a list of symbols along with start and end date
Returns the Event Matrix which is a pandas Datamatrix
Event matrix has the following structure :
|IBM |GOOG|XOM |MSFT| GS | JP |
(d1)|nan |nan | 1 |nan |nan | 1 |
(d2)|nan | 1 |nan |nan |nan |nan |
(d3)| 1 |nan | 1 |nan | 1 |nan |
(d4)|nan | 1 |nan | 1 |nan |nan |
...................................
...................................
Also, d1 = start date
nan = no information about any event.
1 = status bit(positively confirms the event occurence)
"""
# Get the data from the data store
storename = "Yahoo" # get data from our daily prices source
# Available field names: open, close, high, low, close, actual_close, volume
closefield = "actual_close"
def findEvents(symbols, startday,endday, marketSymbol,verbose=False):
# Reading the Data for the list of Symbols.
timeofday=dt.timedelta(hours=16)
timestamps = du.getNYSEdays(startday,endday,timeofday)
dataobj = da.DataAccess('Yahoo')
if verbose:
print "Reading data for {0} to {1}".format(startday, endday)
# Reading the Data
close = dataobj.get_data(timestamps, symbols, closefield)
# Completing the Data - Removing the NaN values from the Matrix
close = (close.fillna(method='ffill')).fillna(method='backfill')
np_eventmat = copy.deepcopy(close)
for sym in symbols:
for time in timestamps:
np_eventmat[sym][time]=np.NAN
if verbose:
print "Finding events for {0} to {1}".format(startday, endday)
# A list where we store symbols that have events
symbols_matching = []
# Generating the Event Matrix
for symbol in symbols:
for i in range(1,len(close[symbol])):
if close[symbol][i] < 5.00 and close[symbol][i - 1] >= 5.00 : # Stock dropped below $5 today # !!
if verbose:
print ("Event detected for {0} close was {1} vs "
"previous close of {2}".format(symbol,
close[symbol][i],
close[symbol][i - 1]))
np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event
if symbol not in symbols_matching:
symbols_matching.append(symbol) #!! keep track of all the symbols that fullfil the condition:
# return our event matrix and a list of symbols that matched the event filter
return np_eventmat, symbols_matching
################ MAIN CODE ######################
# Only run this code if we are calling this script directly
if __name__ == "__main__":
# Parse cli arguments
parser = argparse.ArgumentParser(description='Tutorial 9 arguments')
parser.add_argument('--skip_ep',
dest = 'skip_ep',
action = 'store_true',
help = "This flag indicates we want to skip the event profiler stage",
default = False)
args = parser.parse_args()
# Set the date constraints
startday = dt.datetime(2008,1,1)
endday = dt.datetime(2009,12,31)
# Symbol for market benchmark
market_symbol = 'SPY'
# Create a dictionary to store our symbol lists
spy = {}
# Create a DataAccess instance
dataobj = da.DataAccess('Yahoo')
# Create dictionary for symbols and color by year
spy = {2012: {'symbols': dataobj.get_symbols_from_list("sp5002012"),
'PlotColor': '#0000FF'},
2008: {'symbols': dataobj.get_symbols_from_list("sp5002008"),
'PlotColor': '#FF0000'}}
myplt = plt
myplt.clf()
for year in spy:
startday = dt.datetime(2008,1,1)
endday = dt.datetime(2009,12,31)
symbols = spy[year]['symbols']
PlotColor = spy[year]['PlotColor']
# We need the SPY500 ticker in our list
if market_symbol not in symbols:
symbols.append(market_symbol)
# Get our event matrix and a list of symbols that triggered events
eventMatrix, matching_symbols = findEvents(symbols,
startday,
endday,
marketSymbol=market_symbol,
verbose=True)
# Add our matching symbols to the spy dictionary
spy[year]['matches'] = matching_symbols
# skip the rest of this loop iteration
# if we want to skip event profiling
if args.skip_ep == True:
continue
# Create a new event proviler instances and
# pass it our plot object
eventProfiler = ep.EventProfiler(eventMatrix,
startday,
endday,
lookback_days=20,
lookforward_days=20,
verbose=True)
returned_plt = eventProfiler.study(plt=myplt,
plotErrorBars=True,
plotMarketNeutral=True,
plotEvents=False,
marketSymbol=market_symbol,
PlotColor=PlotColor)
if args.skip_ep == False:
# Only attempt to save if we did NOT skip event profiler
# Save our plot to a pdf
returned_plt.savefig('MyPlot.pdf',format='pdf')
# Hacky, should not hard code years
# cast to sets and find common symbols
common_stocks = set(spy[2008]['matches']) & set(spy[2012]['matches'])
# xor
diff_stocks = set(spy[2008]['matches']) ^ set(spy[2012]['matches'])
print "Stocks with events that were in the SP500 both years: "
print common_stocks
print "Stocks with events that were not in the SP500 both years:"
print diff_stocks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment