Created
July 20, 2020 18:17
-
-
Save tradingbills/8f81e1fa54a5d750b0ff8ad98fe85f75 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readxl) | |
library(tidyverse) | |
library(psych) | |
library(scales) | |
setwd("C:/Users/tradingbills/Documents/_exer/_data/math/wk4/") | |
# 1 compute the covariance | |
# COV = Sum( (x_i - x_bar) * (y-i -y_bar)) / N-1 | |
# COVARIANCE | |
covariance <- function(x,y){ | |
numerator <- sum(( x - mean(x)) * ( y - mean(y))) | |
denominator <- length(x) - 1 | |
return (numerator/denominator) | |
} | |
cereal <- read_excel('Cereals.xlsx') | |
x <- cereal$Sugar | |
y <- cereal$Calories | |
covariance(x,y) | |
cereal %>% ggplot(aes(Sugar,Calories)) + | |
theme_bw() + | |
geom_point() | |
# COVARIANCE | |
covar_of_cereal <- covariance(x,y) | |
# r (coefficient of correlation) pearson | |
r_of_cereal <- covar_of_cereal/(sd(x) * sd(y)) | |
cor(x, y) | |
round(r_of_cereal, digits = 4) == round(cor(x, y), digits = 4) # [1] TRUE | |
cor(x, y, method = "spearman") | |
r_sqrd <- (cor(x, y))^2 # [1] 0.8561823 | |
fit <- lm(y~x) | |
summary(fit) | |
#2 | |
football <- read_excel("College Football.xlsx") | |
glimpse(football) | |
ds02 <- football %>% | |
select(`Total Pay`, `Football Net Revenue`) %>% | |
rename(ttl_pay = `Total Pay`, | |
revenue = `Football Net Revenue`) | |
ds02 %>% ggplot(aes(revenue, ttl_pay )) + | |
theme_bw() + | |
geom_point() | |
x = ds02$revenue | |
y = ds02$ttl_pay | |
# COVARIANCE | |
numerator <- sum(( x - mean(x)) * ( y - mean(y))) | |
denominator <- length(ds02$revenue) - 1 | |
covar_of_football <- numerator/denominator | |
r_of_football <- covar_of_football/(sd(x) * sd(y)) | |
cor(x, y) | |
cor(x, y, method = "spearman") | |
round(r_of_football, digits = 4) == round(cor(x, y), digits = 4) # [1] TRUE | |
r_sqrd <- (cor(cereal$x, cereal$y))^2 # [1] 0.8561823 | |
#3 HDL_cholesterol | |
hdl <- read_excel('HDL_cholesterol.xlsx') | |
hdl %>% ggplot(aes(x=Age, y=Cholesterol)) + | |
theme_bw() + | |
geom_point() | |
# COVARIANCE | |
x <- hdl$Age | |
y <- hdl$Cholesterol | |
numerator <- sum(( x - mean(x)) * ( y - mean(y))) | |
denominator <- length(hdl$Age) - 1 | |
covar_of_hdl <- numerator/denominator | |
r_of_hdl <- covar_of_hdl/(sd(x) * sd(y)) | |
cor(x, y) | |
cor(x, y, method = "spearman") | |
round(r_of_hdl, digits = 4) == round(cor(x, y), digits = 4) # [1] TRUE | |
#4 MRI | |
mri <- read_excel('MRI_IQ.xlsx') | |
#mri as explanatory and iq as response | |
breaks_log10 <- function(x){ | |
low <- floor(log(min(x), base = 8)) | |
high <- ceiling(log10(max(x))) | |
10^(seq.int(low, high)) | |
} | |
ggplot(mri,aes(x=MRI_COUNT, y=IQ, shape = GENDER)) + | |
geom_point()+ | |
scale_shape_manual(values = c(24, 16 )) | |
female <- mri %>% | |
filter(GENDER == "F") | |
cor(female$MRI_COUNT, female$IQ) | |
male <- mri %>% | |
filter(GENDER == "M") | |
cor(male$MRI_COUNT, male$IQ) | |
#5 | |
library(lsr) | |
baseball <- read_excel("Baseball P14.xlsx") | |
correlate(baseball) | |
Filter(is.numeric, baseball) %>% | |
cor() | |
#8 | |
judging <- read_excel('Judging.xlsx') | |
cor(judging$Judge_1_Score, judging$Judge_2_Score, method = "spearman") | |
cor(judging$Judge_1_Score, judging$Judge_2_Score, method = "pearson") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment