Last active
August 14, 2021 08:58
-
-
Save ZhangXichu/e1a2da2e91b3c9687706e167de963ae2 to your computer and use it in GitHub Desktop.
Partial correlation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataH <- heights | |
head(dataH) | |
summary(dataH) | |
# omit all the rows with Na | |
dataH <- na.omit(dataH) | |
summary(dataH) | |
dataH$sex <- as.numeric(dataH$sex) | |
dataH$marital <- as.numeric(dataH$marital) | |
matH <- as.matrix(dataH) | |
# calculate the correlation | |
corM <- rcorr(matH) | |
# this silents the warning while ploting, | |
# otherwise on the diagonal we have Na | |
corM$P[is.na(corM$P)] <- 0 | |
# plot the correlation matrix, with the statistically insignificant | |
# factors crossed out | |
corrplot(corM$r, p.mat=corM$P, method="circle") | |
# build linear model using all the variables | |
lm.ihw <- lm(income ~ ., data = dataH) | |
summary(lm.ihw) | |
# calculate partial correlation | |
corMp <- pcor(matH, method="pearson") | |
# the partial correlation between income and height | |
corrplot(corMp$estimate, p.mat=corMp$p.value, method = "circle") | |
corMp$estimate["height", "income"] | |
# --> 0.01655639 | |
# if we take only the dataframe with those three columns | |
# the result produced by pcor is the same as the result | |
# calculated using linear regression | |
dataSmall <- dataH[c("income", "height", "sex")] | |
matSmall <- as.matrix(dataSmall) | |
corSmall <- pcor(matSmall, method="pearson") | |
# the partial correlation between income and height | |
corSmall$estimate["height", "income"] | |
# --> 0.0937149 | |
cor(dataSmall$income, dataSmall$height, method = "pearson") | |
# --> 0.21765 | |
# calculate partial correlation between income and height | |
# using regression | |
m.i <- lm(income ~ sex, data = dataH) | |
m.s<- lm(height ~ sex, data = dataH) | |
v.i <- summary(m.i) | |
v.s <- summary(m.s) | |
cor(v.i$residuals, v.s$residuals, method="pearson") | |
# --> 0.0937149 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment