Skip to content

Instantly share code, notes, and snippets.

@ya7ya
Last active April 23, 2019 02:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ya7ya/3dacf63ed4ae6766c00daa8bda189ccc to your computer and use it in GitHub Desktop.
Save ya7ya/3dacf63ed4ae6766c00daa8bda189ccc to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# we're using yahoo finance data, pandas datareader will import the data we need
from pandas_datareader.data import DataReader
def get_adj_prices(symbols,start_date):
df = pd.DataFrame()
for symbol in symbols:
dftemp = DataReader(symbol, "yahoo", start_date)
df[symbol] = dftemp["Adj Close"]
return df
def find_corr(df,lagged, lag=1):
dflag = df
dflag["{}_lag".format(lagged)] = dflag[lagged].shift(lag)
dflag = dflag.dropna()
# computing correlation with 1 line.
dflag = dflag.assign(correlation = dflag.ix[:,0].rolling(window=5).corr(dflag["{}_lag".format(lagged)]))
dflag = dflag.dropna()
# this produces 2 arrays of count and the slices
count, division = np.histogram(dflag["correlation"])
# argmax is used to get the index of the highest count,
# then getting the value in the divison array using that index
most_occuring_value = division[count.argmax()]
# visualizing using a histogram
ax = dflag.hist(column="correlation")
plt.title("Correlation Histogram")
# plotting a line
plt.axvline(most_occuring_value, color="r", linestyle="dashed", linewidth=2)
plt.show()
print "Most re-occuring Corr value = %f" % most_occuring_value
df_normalized = df[[s for s in df.columns if s not in [lagged]]]
# normalized the numbers to make it easier to compare
df_normalized = df_normalized/ df_normalized.iloc[0]
df_normalized.plot()
plt.title("Normalized Adj Closing Prices")
plt.show()
# to test this
# df = get_adj_prices(["PEP","KO"], "2011-01-01")
# find_corr(df,"KO",1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment