Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import urllib2, csv
import matplotlib.pyplot as plt
import datetime
import seaborn
import numpy, scipy.stats, math
f = urllib2.urlopen('https://raw.githubusercontent.com/datasets/s-and-p-500/master/data/data.csv')
csv = csv.reader(f)
csv.next() # headers
dates = []
reinvested = []
last = None
total = 1.0
for line in csv:
date, value, dividends = line[:3]
if date == '2016-04-01':
break
date = datetime.date(*map(int, date.split('-')))
value = float(value)
dividends = float(dividends)
if last is not None:
sp_yield = value / last
dv_yield = dividends / last / 12
total *= (sp_yield + dv_yield)
last = value
reinvested.append(total)
dates.append(date)
plt.plot(dates, reinvested)
plt.yscale('log')
plt.title('S&P 500 total return')
plt.ylabel('Index (1870: 1.0)')
plt.savefig('sp500_return.png')
lump_returns = []
dcav_returns = []
n_years = 5
interval = 12*n_years
for offset in xrange(len(reinvested)-interval):
streams_lump = [-1.0] + [0.0] * (interval-1) + [reinvested[offset + interval] / reinvested[offset]]
streams_dcav = [-1.0] * interval + [sum([reinvested[offset + interval] / r for r in reinvested[offset : offset + interval]])]
lump_returns.append(numpy.irr(streams_lump) * 12 * 100)
dcav_returns.append(numpy.irr(streams_dcav) * 12 * 100)
lump_gain = [int(l > 0) for l in lump_returns]
dcav_gain = [int(l > 0) for l in dcav_returns]
print sum(lump_gain) / len(lump_gain)
print sum(dcav_gain) / len(dcav_gain)
print(scipy.stats.ttest_ind(lump_gain, dcav_gain))
plt.clf()
amin, amax = (int(math.floor(f((lump_returns, dcav_returns)))) for f in (numpy.amin, numpy.amax))
bins = range(amin, amax, 2)
seaborn.distplot(lump_returns, label='Lump investment returns (mean=%.2f%%)' % numpy.mean(lump_returns), bins=bins)
seaborn.distplot(dcav_returns, label='Dollar cost averaging returns (mean=%.2f%%)' % numpy.mean(dcav_returns), bins=bins)
s, p = scipy.stats.wilcoxon(lump_returns, dcav_returns)
plt.title('Lump vs dollar cost returns for a %d year horizon (p=%f)' % (n_years, p))
plt.legend(loc=2)
plt.ylabel('Probability')
plt.xlabel('Annual return over %d years (%%)' % n_years)
plt.savefig('lump_vs_dcav.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment