Skip to content

Instantly share code, notes, and snippets.

@ajdamico
Created May 26, 2023 22:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajdamico/1de00db041e957099913bab893d0bf08 to your computer and use it in GitHub Desktop.
Save ajdamico/1de00db041e957099913bab893d0bf08 to your computer and use it in GitHub Desktop.
a hands-on introduction taught by william franz lamberti
# support vector machines #
data(quakes)
plot( quakes , col = as.factor( round( quakes$mag ) ) )
this_df <- quakes
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
library(e1071)
fit_linear <- svm( mag ~ . , data = this_df , kernel = 'linear' )
plot( fit_linear , data = this_df , stations ~ depth )
ypred <- predict( fit_linear , this_df )
mean( ypred == this_df$mag )
myfun <-
function( ... ) {
list(
linear = svm( ... , kernel = 'linear' ) ,
sigmoid = svm( ... , kernel = 'sigmoid' ) ,
radial = svm( ... , kernel = 'radial' ) ,
polynomial = svm( ... , kernel = 'polynomial' )
)
}
full_models <- myfun( mag ~ . , data = this_df )
full_model_predictions <-
lapply(
full_models ,
predict ,
this_df
)
full_model_accuracy <-
lapply(
full_model_predictions ,
function( w ) mean( w == this_df[ , 'mag' ] )
)
full_model_tables <-
lapply(
full_model_predictions ,
function( w ) table( predict = w , this_df[ , 'mag' ] )
)
partial_models <- myfun( mag ~ depth + stations , data = this_df )
partial_model_predictions <-
lapply(
partial_models ,
predict ,
this_df
)
partial_model_accuracy <-
lapply(
partial_model_predictions ,
function( w ) mean( w == this_df[ , 'mag' ] )
)
partial_model_tables <-
lapply(
partial_model_predictions ,
function( w ) table( predict = w , this_df[ , 'mag' ] )
)
# resampling methods for classification testing & training #
data(quakes)
this_df <- quakes
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
set.seed(2023)
training_vals <-
sample(
seq( nrow( this_df ) ) ,
round( nrow( this_df ) * 0.7 ) ,
replace = FALSE
)
training_df <- this_df[ training_vals , ]
testing_df <- this_df[ -training_vals , ]
library(e1071)
fit_radial <- svm( mag ~ . , data = training_df , kernel = 'radial' )
predicted_train <- predict( fit_radial , training_df )
table( predicted_train , training_df$mag )
mean( predicted_train == training_df$mag )
predicted_test <- predict( fit_radial , testing_df )
table( predicted_test , testing_df$mag )
mean( predicted_test == testing_df$mag )
# cross-validation #
library(e1071)
data(quakes)
this_df <- quakes
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
set.seed(2023)
training_vals <-
sample(
seq( nrow( this_df ) ) ,
round( nrow( this_df ) * 0.7 ) ,
replace = FALSE
)
training_df <- this_df[ training_vals , ]
testing_df <- this_df[ -training_vals , ]
tc <- tune.control( cross = 10 )
tune_out <-
tune(
svm ,
mag ~ . ,
data = training_df ,
kernel = 'radial' ,
ranges = list( gamma = c( 1 / ncol( training_df ) , 0.3 , 0.5 , 1 , 2 , 5 ) ) ,
tunecontrol = tc
)
predicted_training <- predict( tune_out$best.model , training_df )
mean( predicted_training == training_df$mag )
predicted_testing <- predict( tune_out$best.model , testing_df )
mean( predicted_testing == testing_df$mag )
# trees #
library(rpart)
data(quakes)
set.seed(2023)
# regression
this_df <- quakes
training_records <-
sample(
seq( nrow( this_df ) ) ,
round( nrow( this_df ) * 0.7 ) ,
replace = FALSE
)
training_df <- this_df[ training_records , ]
testing_df <- this_df[ -training_records , ]
fit_anova <-
rpart(
mag ~ . ,
data = training_df ,
method = 'anova'
)
training_anova <- predict( fit_anova , training_df )
rss <- sum( ( training_df$mag - training_anova )^2 )
tss <- sum( ( training_df$mag - mean( training_df$mag ) )^2 )
1 - ( rss / tss )
testing_anova <- predict( fit_anova , testing_df )
rss <- sum( ( testing_df$mag - testing_anova )^2 )
tss <- sum( ( testing_df$mag - mean( testing_df$mag ) )^2 )
1 - ( rss / tss )
# classification
this_df <- quakes
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
training_df <- this_df[ training_records , ]
testing_df <- this_df[ -training_records , ]
fit_class <-
rpart(
mag ~ . ,
data = training_df ,
method = 'class'
)
training_class <- predict( fit_class , training_df , type = 'class' )
mean( training_class == training_df$mag )
testing_class <- predict( fit_class , testing_df , type = 'class' )
mean( testing_class == testing_df$mag )
# random forests #
library(randomForest)
data(quakes)
set.seed(2023)
this_df <- quakes
training_records <-
sample(
seq( nrow( this_df ) ) ,
round( nrow( this_df ) * 0.7 ) ,
replace = FALSE
)
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
training_df <- this_df[ training_records , ]
testing_df <- this_df[ -training_records , ]
fit_class <-
randomForest(
mag ~ . ,
data = training_df
)
training_class <- predict( fit_class , training_df )
mean( training_class == training_df$mag )
testing_class <- predict( fit_class , testing_df )
mean( testing_class == testing_df$mag )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment