Skip to content

Instantly share code, notes, and snippets.

@fditraglia
Created September 23, 2013 15:52
Show Gist options
  • Save fditraglia/6672622 to your computer and use it in GitHub Desktop.
Save fditraglia/6672622 to your computer and use it in GitHub Desktop.
R Tutorial #3 - Transcript and Suggested Solution Code
survey <- read.csv("http://www.ditraglia.com/econ103/old_survey.csv")
survey <- survey[,c("handedness", "height", "handspan")]
head(survey)
cor(survey$handspan, survey$height, use = "complete.obs")
cor(survey, use = "complete.obs")
cov(survey$handspan, survey$height, use = "complete.obs")
cov(survey, use = "complete.obs")
lm(height ~ handspan, data = survey)
lm(handspan ~ height, data = survey)
plot(handspan ~ height, data = survey)
plot(handspan ~ height, data = survey)
abline(coefficients(lm(handspan ~ height, data = survey)))
plot(handspan ~ height, data = survey)
abline(coefficients(lm(handspan ~ height, data = survey)))
abline(v = mean(survey$height, na.rm = TRUE), col = "red", lty = 2)
abline(h = mean(survey$handspan, na.rm = TRUE), col = "red", lty = 2)
x <- seq(from = 0, to = 1, by = 0.1)
y <- x^2
plot(y ~ x)
abline(a = 0, b = 1)
#Exercise #1 - Regression
lm(height ~ handedness, data = survey)
plot(height ~ handedness, data = survey)
abline(coefficients(lm(height ~ handedness, data = survey)))
abline(v = mean(survey$handedness, na.rm = TRUE), col = "red", lty = 2)
abline(h = mean(survey$height, na.rm = TRUE), col = "red", lty = 2)
z.score <- function(x){
z <- (x - mean(x, na.rm = TRUE))/sd(x, na.rm = TRUE)
return(z)
}
foo <- c(1, 3, NA, 3, 0, NA)
is.na(foo)
foo[1:2]
foo[c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE)]
foo[is.na(foo)]
!is.na(foo)
foo[!is.na(foo)]
foo <- foo[!is.na(foo)]
mymean <- function(x){
x <- x[!is.na(x)]
x.bar <- sum(x)/length(x)
return(x.bar)
}
mean(survey$height, na.rm = TRUE)
mymean(survey$height)
mymean2 <- function(x){
x.bar <- sum(x, na.rm = TRUE)/length(x)
return(x.bar)
}
mymean2(survey$height)
myvar <- function(x){
x <- x[!is.na(x)]
s.squared <- sum((x-mymean(x))^2)/(length(x) - 1)
return(s.squared)
}
var(survey$handspan, na.rm = TRUE)
myvar(survey$handspan)
#Exercise #2 - Write a Function to Calculate Skewness
skew <- function(x){
x <- x[!is.na(x)]
numerator <- sum((x - mean(x))^3)/length(x)
denominator <- sd(x)^3
return(numerator/denominator)
}
skew(survey$handedness)
summary.stats <- function(x){
x <- x[!is.na(x)]
sample.mean <- mean(x)
std.dev <- sd(x)
out <- data.frame(sample.mean, std.dev)
return(out)
}
results <- summary.stats(survey$handedness)
results
results$sample.mean
results$std.dev
mycov <- function(x, y){
keep <- !is.na(x) & !is.na(y)
x <- x[keep]
y <- y[keep]
n <- length(x)
s.xy <- sum( (x - mean(x)) * (y - mean(y)) ) / (n-1)
return(s.xy)
}
cov(survey$handspan, survey$handedness, use = "complete.obs")
mycov(survey$handspan, survey$handedness)
x <- c(1, 2, NA, 3)
y <- c(5, NA, 6, 0)
!is.na(x)
!is.na(y)
keep <- !is.na(x) & !is.na(y)
keep
x[keep]
y[keep]
#Exercise #3 - Write a Function to Carry Out Linear Regression
myreg <- function(y, x){
keep <- !is.na(x) & !is.na(y)
x <- x[keep]
y <- y[keep]
b <- cov(x,y)/var(x)
a <- mean(y) - b * mean(x)
out <- data.frame(a, b)
return(out)
}
lm(height ~ handspan, data = survey)
myreg(y = survey$height, x = survey$handspan)
library(Quandl)
library(zoo)
apple.prices <- Quandl('GOOG/NASDAQ_AAPL', start_date = '2012-01-01', end_date = '2012-12-31', type = 'zoo')
head(apple.prices)
plot(apple.prices)
plot(apple.prices$Close, main = 'Daily Closing Prices: Apple Computer - 2012', xlab = 'Date', ylab = 'Price')
foo <- c(1,2,4,7,11)
diff(foo)
apple.returns <- diff(log(apple.prices$Close))
plot(apple.returns, main = 'Apple Computer - 2012', xlab = 'Date', ylab = 'Log Daily Returns', col = 'blue')
mean(apple.returns)
sd(apple.returns)
mean(apple.returns) * 100
sd(apple.returns) * 100
sum(apple.returns)
hist(apple.returns)
mean(apple.returns)
median(apple.returns)
#Exercise #4 - Are Apple Returns Skewed?
skew(apple.returns)
#Exercise #5 - Repeat the Above for IBM Returns
ibm.prices <- Quandl("GOOG/NYSE_IBM", start_date = "2012-01-01", end_date = "2012-12-31", type = "zoo")
head(ibm.prices)
plot(ibm.prices)
plot(ibm.prices$Close, main = "Daily Closing Prices: IBM - 2012", xlab = "Date", ylab = "Price")
ibm.returns <- diff(log(ibm.prices$Close))
plot(ibm.returns, main = "IBM - 2012", xlab = "date", ylab = "Log Daily Returns", col = "Blue")
mean(ibm.returns)
sd(ibm.returns)
sum(ibm.returns)
hist(ibm.returns)
mean(ibm.returns)
median(ibm.returns)
skew(ibm.returns)
#Exercise #6 - Correlations between Returns
boa.prices <- Quandl("GOOG/NYSE_BAC", start_date = "2012-01-01", end_date = "2012-12-31", type = "zoo")
boa.returns <- diff(log(boa.prices$Close))
cor(boa.returns, apple.returns)
cor(apple.returns, ibm.returns)
cor(boa.returns, ibm.returns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment