smc77 / cs229_univariate_regression
Created October 3, 2011 00:28
Univariate regression with housing data
# First look at a linear model fit to the housing data
# details about dataset available
housing <- read.table("")[, c(6, 14)]
names(housing) <- c("num.rooms", "median.values")
housing.lm <- lm(median.values ~ num.rooms, data=housing)
smc77 / intuitive_regression
Created October 5, 2011 02:01
Fitting various random lines to the housing data to get an intuition about the loss function.
# Example of randomly chosen lines
abline(0, 5, col="red")
abline(-50, 10, col="blue")
# Create the loss function
loss <- function(intercept, slope) sum(((intercept + (slope * housing[, "num.rooms"])) - housing[, "median.values"])^2)/2
# Create some data for a given line and compute the loss
loss(0, 5)
smc77 / linear algebra in R
Created October 20, 2011 01:32
Quick linear algebra demo
# Matrix addition
matrix(c(1, 2, 3, 0, 5, 1), ncol=2) + matrix(c(4, 2, 0, 0.5, 5, 1), ncol=2)
# Matrix multiplication
3 * matrix(c(1, 2, 3, 0, 5, 1), ncol=2)
# Matrix-Vector Multiplication
matrix(c(1, 4, 2, 3, 0, 1), ncol=2) %*% c(1, 5)
# Matrix-Mector Multiplication
smc77 / multivariate
Created October 22, 2011 23:44
Multivariate Regression
# details about dataset available
housing <- read.table("")
names(housing) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV")
# Subset the data for our model
housing <- housing[, c("CRIM", "RM", "PTRATIO", "LSTAT", "MEDV")]
smc77 / multivariate_grad_descent.R
Created October 23, 2011 21:32
Multivariate Gradient Descent
# Load data and initialize values
data <- read.csv("")
num.iterations <- 1000
x <- data[, c("area", "bedrooms")]
y <- matrix(data$price, ncol=1) / 1000 # Divide by a thousand so that numbers are in $1000's
# Function to standardize input values
zscore <- function(x, mean.val=NA) {
smc77 / normal_equation.R
Created October 24, 2011 00:11
Normal Equation
data <- read.csv("")
x <- as.matrix(cbind(intercept=rep(1, m), data[, c("area", "bedrooms")]))
theta <- solve(t(x) %*% x) %*% t(x) %*% y
smc77 / logistic_regression.R
Created October 26, 2011 01:45
Logistic Regression
# Plot the sigmoid function
qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function")
# Download South African heart disease data
sa.heart <- read.table("", sep=",",head=T,row.names=1)
# Pretty plot
smc77 / logistic_regression_multi.R
Created October 28, 2011 03:28
Multiclass Logistic Regression
# Plot the data
pairs(iris[1:4], main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])
# Use linear discriminant analysis
iris.lda <- lda(Species ~ ., data = iris)
# Use a multinomial logistic regression model
iris.vglm <- glm(Species ~ , family=multinomial, data=iris)
smc77 / overfitting.R
Created November 4, 2011 02:05
n <- 10
f <- function(x) sin(2 * pi * x)
x <- seq(0, 1, length=n)
y <- f(x) + rnorm(n, sd=0.2)
plot(data.frame(x, y))
smc77 / polynomial_generalization.R
Created November 9, 2011 03:22
# Let's look at how the different models generalize between different datasets
# <- 10
n.test <- 100
error.function <- function(y, y.pred) sum((y.pred - y)^2) / 2
e.rms <- function(y, y.pred) sqrt(2 * error.function(y=y, y.pred=y.pred) / length(y))