Skip to content

Instantly share code, notes, and snippets.

@mick001
Last active April 15, 2023 09:17
Show Gist options
  • Star 14 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save mick001/620195a84212c6afea1c to your computer and use it in GitHub Desktop.
Save mick001/620195a84212c6afea1c to your computer and use it in GitHub Desktop.
Modelling dependence with copulas. Full article at: http://datascienceplus.com/modelling-dependence-with-copulas/
#Load library mass and set seed
library(MASS)
set.seed(100)
# We are going to use 3 random variables
m <- 3
# Number of samples to be drawn
n <- 2000
# Covariance matrix
sigma <- matrix(c(1, 0.4, 0.2,
0.4, 1, -0.8,
0.2, -0.8, 1),
nrow=3)
# Random samples
z <- mvrnorm(n,mu=rep(0, m),Sigma=sigma,empirical=T)
# Pairplot library
library(psych)
# Correlation calculation
cor(z,method='spearman')
# Pairplot
pairs.panels(z)
# Transform samples into uniform data within [0,1]
u <- pnorm(z)
pairs.panels(u)
# 3D plot library
library(rgl)
# 3D plot
plot3d(u[,1],u[,2],u[,3],pch=20,col='navyblue')
# Apply the inverse of the selected marginals
x1 <- qgamma(u[,1],shape=2,scale=1)
x2 <- qbeta(u[,2],2,2)
x3 <- qt(u[,3],df=5)
#3D plot
plot3d(x1,x2,x3,pch=20,col='blue')
# Check correlation
df <- cbind(x1,x2,x3)
pairs.panels(df)
cor(df,meth='spearman')
# Shorten the steps using the copula package
library(copula)
set.seed(100)
myCop <- normalCopula(param=c(0.4,0.2,-0.8), dim = 3, dispstr = "un")
myMvd <- mvdc(copula=myCop, margins=c("gamma", "beta", "t"),
paramMargins=list(list(shape=2, scale=1),
list(shape1=2, shape2=2),
list(df=5)) )
# Generate random samples
Z2 <- rmvdc(myMvd, 2000)
colnames(Z2) <- c("x1", "x2", "x3")
pairs.panels(Z2)
#------------------------------------------------------------------------------------------------------------------------------
# Example with real data
# Load data
cree <- read.csv('cree_r.csv',header=F)$V2
yahoo <- read.csv('yahoo_r.csv',header=F)$V2
# Plot and correlation checks
plot(cree,yahoo,pch='.')
abline(lm(yahoo~cree),col='red',lwd=1)
cor(cree,yahoo,method='spearman')
# Select the copula to be used
library(VineCopula)
u <- pobs(as.matrix(cbind(cree,yahoo)))[,1]
v <- pobs(as.matrix(cbind(cree,yahoo)))[,2]
selectedCopula <- BiCopSelect(u,v,familyset=NA)
selectedCopula
# Fit a t-copula
t.cop <- tCopula(dim=2)
set.seed(500)
m <- pobs(as.matrix(cbind(cree,yahoo)))
fit <- fitCopula(t.cop,m,method='ml')
coef(fit)
# Set the parameters
rho <- coef(fit)[1]
df <- coef(fit)[2]
# Plot the density
persp(tCopula(dim=2,rho,df=df),dCopula)
# Sample random observation from the copula
u <- rCopula(3965,tCopula(dim=2,rho,df=df))
plot(u[,1],u[,2],pch='.',col='blue')
cor(u,method='spearman')
# Estimate marginals parameters
cree_mu <- mean(cree)
cree_sd <- sd(cree)
yahoo_mu <- mean(yahoo)
yahoo_sd <- sd(yahoo)
# Plot the histograms of the marginals and the fitted lines
hist(cree,breaks=80,main='Cree returns',freq=F,density=30,col='cyan',ylim=c(0,20),xlim=c(-0.2,0.3))
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),cree_mu,cree_sd),col='red',lwd=2)
legend('topright',c('Fitted normal'),col=c('red'),lwd=2)
hist(yahoo,breaks=80,main='Yahoo returns',density=30,col='cyan',freq=F,ylim=c(0,20),xlim=c(-0.2,0.2))
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),yahoo_mu,yahoo_sd),col='red',lwd=2)
legend('topright',c('Fitted normal'),col=c('red'),lwd=2)
# Build the distribution from the copula
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=df), margins=c("norm","norm"),
paramMargins=list(list(mean=cree_mu, sd=cree_sd),
list(mean=yahoo_mu, sd=yahoo_sd)))
# Sample from the distribution
sim <- rmvdc(copula_dist, 3965)
# Compare observed and simulated values
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated'),col=c('black','red'),pch=21)
# Take a look at what would have happened for df=1 and df=8
set.seed(4258)
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=1), margins=c("norm","norm"),
paramMargins=list(list(mean=cree_mu, sd=cree_sd),
list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rmvdc(copula_dist, 3965)
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated df=1'),col=c('black','red'),pch=21)
cor(sim[,1],sim[,2],method='spearman')
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=8), margins=c("norm","norm"),
paramMargins=list(list(mean=cree_mu, sd=cree_sd),
list(mean=yahoo_mu, sd=yahoo_sd)))
sim <- rmvdc(copula_dist, 3965)
plot(cree,yahoo,main='Returns')
points(sim[,1],sim[,2],col='red')
legend('bottomright',c('Observed','Simulated df=8'),col=c('black','red'),pch=21)
cor(sim[,1],sim[,2],method='spearman')
@sri-ram07
Copy link

Could you please help me to write the algorithm for how to generate data from a copula model, for example Clayton copula

@markencoder
Copy link

Could you please help me to write the algorithm for how to generate data from a copula model, for example, Clayton copula
I think you can refer to the code's line 118,131,140, which is the generate date from the copula model, I think all you need to do is substitute the t-copula into Clayton copula

@GOUD05
Copy link

GOUD05 commented Jan 4, 2021

Could you please help me to do the code of the dependence structure of copula vary with time (dynamic copula)?

@MatthieuStigler
Copy link

code is slighlty outdated as of 2023 with copula 1.1.2:

Z2 <- rmvdc(myMvd, 2000)
Error: 'rmvdc' is defunct.
Use 'rMvdc' instead.

New code should apparently read:

Z2 <- rMvdc(2000, myMvd)

thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment