Skip to content

Instantly share code, notes, and snippets.

@mick001
Created August 29, 2015 11:28
Show Gist options
  • Save mick001/84f824d5be390f3f3aee to your computer and use it in GitHub Desktop.
Save mick001/84f824d5be390f3f3aee to your computer and use it in GitHub Desktop.
CopulaClass a Python class for using copulas: a fitting example. Full article at http://www.firsttimeprogrammer.blogspot.com/2015/02/copulaclass-python-class-for-using.html
# Copula class
import numpy as np
import matplotlib.pyplot as plt
from copulalib.copulalib import Copula
from scipy.stats import norm
plt.style.use('ggplot')
class copulaClass(object):
# Available copulas
families = ['frank','clayton','gumbel']
def __init__(self,x,y):
# Information about the data
self.x = x
self.y = y
self.mu_x = np.array(x).mean()
self.mu_y = np.array(y).mean()
self.std_x = np.array(x).std()
self.std_y = np.array(y).std()
# Information about the copula
self.cop = 0
self.famil = 0
self.tau_ = 0
self.sr_ = 0
self.theta_ = 0
def showAvailableCopulas(self):
"""This function plots available copulas
to give you a visual insight """
# Random simulated data
x = np.random.normal(size=250)
y = 2.5*x + np.random.normal(size=250)
fig = plt.figure()
# Frank
frank = Copula(x,y,family='frank')
uf,vf = frank.generate_uv(1000)
fig.add_subplot(2,2,1)
plt.scatter(uf,vf,marker='.',color='blue')
plt.ylim(0,1)
plt.xlim(0,1)
plt.title('Frank copula')
# Clayton
clayton = Copula(x,y,family='clayton')
uc,vc = clayton.generate_uv(1000)
fig.add_subplot(2,2,2)
plt.scatter(uc,vc,marker='.',color='red')
plt.ylim(0,1)
plt.xlim(0,1)
plt.title('Clayton copula')
# Gumbel
gumbel = Copula(x,y,family='gumbel')
ug,vg = gumbel.generate_uv(1000)
fig.add_subplot(2,2,3)
plt.scatter(ug,vg,marker='.',color='green')
plt.ylim(0,1)
plt.xlim(0,1)
plt.title('Gumbel copula')
plt.show()
def plotData(self):
"""This function plots the data you've fed in
to give you a visual insight of the correlation
structure and the marginal distributions """
x = self.x
y = self.y
fig = plt.figure()
fig.add_subplot(2,2,1)
plt.hist(x,bins=20,color='green',alpha=0.8,align='mid')
plt.title('X variable distribution')
fig.add_subplot(2,2,3)
plt.scatter(x,y,marker="o",alpha=0.8)
fig.add_subplot(2,2,4)
plt.title('Joint X,Y')
plt.hist(y,bins=20,orientation='horizontal',color='red',alpha=0.8,align='mid')
plt.title('Y variable distribution')
plt.show()
def generateCopula(self,fam,plot=False):
"""Generate the copula and optionally plot it"""
if fam.lower() not in self.families:
raise ValueError('Please select a valid family name')
# Copula generation
self.famil = fam.lower()
c = Copula(self.x,self.y,family=fam.lower())
self.cop = c
# Parameters are estimated and set
self.tau_ = c.tau
self.sr_ = c.sr
self.theta_ = c.theta
if plot:
u,v = c.generate_uv(1000)
plt.scatter(u,v,marker='.',color='red')
plt.xlim(0,1)
plt.ylim(0,1)
plt.title(fam.lower().capitalize()+' Copula 1000 pseudo observations')
plt.show()
def printCorrelation(self):
# Print details about correlations and parameters
print("#################################################")
print("Correlation details:")
print("Correlation index range: [-1,1] [negative,positive]")
print("Kendall's tau:",self.tau_)
print("Spearman's rho:",self.sr_)
print("Parameter of the copula (theta):",self.theta_)
print("#################################################")
def generatePseudoObs(self,n=1000,plot=False):
"""This function generates and returns simulated pseudo observations """
if self.famil == 0:
raise ValueError('Generate copula first')
u,v = self.cop.generate_uv(n)
if plot:
plt.scatter(u,v,marker='.',color='red')
plt.xlim(0,1)
plt.ylim(0,1)
plt.title(self.famil.capitalize()+' Copula 1000 pseudo observations')
plt.show()
return u,v
def getSimulatedData(self,dist='normal',n=1000):
"""This function simulates real observations assuming that your data
is normally distributed. Optionally you can edit this function and
choose the distribution that fits your data best"""
if dist.lower() == 'normal':
u,v = self.generatePseudoObs(n=n)
x = norm.ppf(u,loc=self.mu_x,scale=self.std_x)
y = norm.ppf(v,loc=self.mu_y,scale=self.std_y)
return x,y
#------------------------------Run the program---------------------------------
# Simulate some data
x = np.random.normal(loc=0,scale=0.4,size=250)
y = 2.5*x + np.random.normal(size=250)
# Genereate a copulaClass instance
a = copulaClass(x,y)
# Visualize your data
a.plotData()
# Should you want to check available copulas run the line below
# a.showAvailableCopulas()
# Fit the copula (say a Frank copula)
a.generateCopula('frank',plot=False)
# Plot pseudo observations
a.generatePseudoObs(plot=True)
# Print parameters
a.printCorrelation()
# Simulate data and plot real observations versus simulated obs.
c,d = a.getSimulatedData()
plt.scatter(c,d,color="red",label="Simulated data",marker='.')
plt.scatter(x,y,color="blue",label="Real data",marker='.')
plt.legend()
plt.title("Fitted Frank copula: simulated data versus real data")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment