Skip to content

Instantly share code, notes, and snippets.

@tradingbills
Created July 20, 2020 18:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tradingbills/8f81e1fa54a5d750b0ff8ad98fe85f75 to your computer and use it in GitHub Desktop.
Save tradingbills/8f81e1fa54a5d750b0ff8ad98fe85f75 to your computer and use it in GitHub Desktop.
library(readxl)
library(tidyverse)
library(psych)
library(scales)
setwd("C:/Users/tradingbills/Documents/_exer/_data/math/wk4/")
# 1 compute the covariance
# COV = Sum( (x_i - x_bar) * (y-i -y_bar)) / N-1
# COVARIANCE
covariance <- function(x,y){
numerator <- sum(( x - mean(x)) * ( y - mean(y)))
denominator <- length(x) - 1
return (numerator/denominator)
}
cereal <- read_excel('Cereals.xlsx')
x <- cereal$Sugar
y <- cereal$Calories
covariance(x,y)
cereal %>% ggplot(aes(Sugar,Calories)) +
theme_bw() +
geom_point()
# COVARIANCE
covar_of_cereal <- covariance(x,y)
# r (coefficient of correlation) pearson
r_of_cereal <- covar_of_cereal/(sd(x) * sd(y))
cor(x, y)
round(r_of_cereal, digits = 4) == round(cor(x, y), digits = 4) # [1] TRUE
cor(x, y, method = "spearman")
r_sqrd <- (cor(x, y))^2 # [1] 0.8561823
fit <- lm(y~x)
summary(fit)
#2
football <- read_excel("College Football.xlsx")
glimpse(football)
ds02 <- football %>%
select(`Total Pay`, `Football Net Revenue`) %>%
rename(ttl_pay = `Total Pay`,
revenue = `Football Net Revenue`)
ds02 %>% ggplot(aes(revenue, ttl_pay )) +
theme_bw() +
geom_point()
x = ds02$revenue
y = ds02$ttl_pay
# COVARIANCE
numerator <- sum(( x - mean(x)) * ( y - mean(y)))
denominator <- length(ds02$revenue) - 1
covar_of_football <- numerator/denominator
r_of_football <- covar_of_football/(sd(x) * sd(y))
cor(x, y)
cor(x, y, method = "spearman")
round(r_of_football, digits = 4) == round(cor(x, y), digits = 4) # [1] TRUE
r_sqrd <- (cor(cereal$x, cereal$y))^2 # [1] 0.8561823
#3 HDL_cholesterol
hdl <- read_excel('HDL_cholesterol.xlsx')
hdl %>% ggplot(aes(x=Age, y=Cholesterol)) +
theme_bw() +
geom_point()
# COVARIANCE
x <- hdl$Age
y <- hdl$Cholesterol
numerator <- sum(( x - mean(x)) * ( y - mean(y)))
denominator <- length(hdl$Age) - 1
covar_of_hdl <- numerator/denominator
r_of_hdl <- covar_of_hdl/(sd(x) * sd(y))
cor(x, y)
cor(x, y, method = "spearman")
round(r_of_hdl, digits = 4) == round(cor(x, y), digits = 4) # [1] TRUE
#4 MRI
mri <- read_excel('MRI_IQ.xlsx')
#mri as explanatory and iq as response
breaks_log10 <- function(x){
low <- floor(log(min(x), base = 8))
high <- ceiling(log10(max(x)))
10^(seq.int(low, high))
}
ggplot(mri,aes(x=MRI_COUNT, y=IQ, shape = GENDER)) +
geom_point()+
scale_shape_manual(values = c(24, 16 ))
female <- mri %>%
filter(GENDER == "F")
cor(female$MRI_COUNT, female$IQ)
male <- mri %>%
filter(GENDER == "M")
cor(male$MRI_COUNT, male$IQ)
#5
library(lsr)
baseball <- read_excel("Baseball P14.xlsx")
correlate(baseball)
Filter(is.numeric, baseball) %>%
cor()
#8
judging <- read_excel('Judging.xlsx')
cor(judging$Judge_1_Score, judging$Judge_2_Score, method = "spearman")
cor(judging$Judge_1_Score, judging$Judge_2_Score, method = "pearson")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment