Created
July 9, 2020 14:58
-
-
Save tradingbills/0452828185ba817ffc79ebf479e54811 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# lm -- -- -- -- -- -- -- -- -- -- 604 Linear Regression | |
# from Statistics 101: Linear Regression, Residual Analysis | |
# youtube.com/watch?v=gLENW2AdJWg | |
bill <- c( 34, 108, 64 , 88 , 99 , 51 ) | |
mean(bill) | |
tip <-c ( 5, 17, 11, 8, 14, 5 ) | |
mean(tip) | |
#correlation coefficient = covariance / co-standard-deviation | |
sample_bill_to_tip <- length(bill) | |
covariance_bill_to_tip <- sum((bill - mean(bill)) * (tip - mean(tip))) / (sample_bill_to_tip-1) # 123 | |
co_standard_deviation <- sd(bill) * sd(tip) # 142.0873 | |
r_correlation_coefficient <- covariance_bill_to_tip/co_standard_deviation # 0.865665 | |
# prove the above | |
cor(bill,tip) == r_correlation_coefficient | |
r_sqrd_bill_to_tip <- r_correlation_coefficient^2 | |
# or r^2 is SSR/SST, requires a linear model from #604 - - - - - - - - - - | |
meal_to_tip <- data.frame(bill,tip) | |
meal_model <- lm(tip~bill, data = meal_to_tip) | |
#SSR predicted y - mean of observed y | |
sr_meal_model <- predict(meal_model) - mean(tip) | |
SSR_meal_model <- sum(sr_meal_model^2) | |
round(SST_meal_model, digits = 0) == round(SSR_meal_model + SSE_meal_model, digits = 0) | |
meal_to_tip <- data.frame(bill,tip) | |
meal_model <- lm(tip ~ bill, data = meal_to_tip) | |
summary(meal_model) | |
plot(meal_model) | |
meal_to_tip %>% ggplot(aes(x=bill, y=tip)) + | |
# observed in diamonds from 597(R-Graphics Cookbook)::Sect5-6 | |
geom_point(col="orange", shape=18, size=2.5) + | |
# predicted linear model | |
geom_smooth(method = "lm", se=F, color="lightgrey") + | |
# view predicteed | |
# predict(meal_model) | |
geom_point(aes(y=predict(meal_model)), col="purple", size=2.5) + | |
# y_bar is the overall observed mean of y | |
geom_line(aes(y=mean(tip)), linetype="dashed") | |
#SSE observed y - predicted y squared | |
se_meal_model <- tip - predict(meal_model) | |
SSE_meal_model = sum((tip - predict(meal_model))^2) | |
# above are the same as below | |
rm(SST) | |
(se_meal_model^2 == sse_meal_model) # TRUE | |
#SST observed y - mean of observed y squared | |
st_meal_model <- tip - mean(tip) | |
SST_meal_model <- sum(st_meal_model^2) | |
#SSR predicted y - mean of observed y | |
sr_meal_model <- predict(meal_model) - mean(tip) | |
SSR_meal_model <- sum(sr_meal_model^2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment