Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A manual data clensing and drawing of probability plot
from scipy.stats import norm
quantiles = [] # records midpoints that we calculate
quantiles_percent = [] # records the proportion of the date lie below the quantile
for i, val in enumerate(returns[:-1]):
quantiles.append((val + returns[i+1])/2)
quantiles_percent.append((i+1)/len(returns))
sorted_quantiles = sorted(quantiles)
qp_array = np.array(quantiles_percent).reshape(-1,1)
tq_array = np.array(sorted_quantiles).reshape(-1,1)
qq_df = pd.DataFrame(np.concatenate((qp_array, tq_array), axis=1),
columns=['percent_below', 'quantile'])
qq_df['theoretical_quantile'] = [norm.ppf(percentage) for percentage in qq_df['percent_below']]
qq_df.tail()
ax = qq_df.plot.scatter(x='theoretical_quantile', y='quantile', label='actual')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment