Skip to content

Instantly share code, notes, and snippets.

@willium
Last active October 22, 2015 22:39
Show Gist options
  • Save willium/d86b94a078f50d4157ab to your computer and use it in GitHub Desktop.
Save willium/d86b94a078f50d4157ab to your computer and use it in GitHub Desktop.
Group Project 1, first R project
###############################################
# SETUP
###############################################
# set working directory
setwd("/Users/willium/Documents/School/uw/2015-2016/Quarter 1/Stat 311/assignments/group-1")
# load data.csv into variable d
d <- read.csv("data.csv", header=TRUE, as.is=TRUE, na.strings=c("", "NA")) # d is data
###############################################
# DATA MANIPULATION
###############################################
# backup old columns
d$oCollision <- d$Collision
d$oPropertyDamage <- d$Property.Damage
d$oComprehensive <- d$Comprehensive
# adjust columns by -100 so the "average insurance is 0"
d$Collision <- d$Collision-100
d$PropertyDamage <- d$Property.Damage-100
d$Comprehensive <- d$Comprehensive-100
# convert size to a factor
d$Size <- factor(d$Size, levels=c("Small","Midsize","Large","VeryLarge"))
# break data up into subsets by size
small <- subset(d, d$Size=="Small")
midsize <- subset(d, d$Size=="Midsize")
large <- subset(d, d$Size=="Large")
vlarge <- subset(d, d$Size=="VeryLarge")
# create WD (wheeldrive) column based on last 3 characters of Vehicle
wheeldrive <- substr(d$Vehicle, nchar(d$Vehicle)-2, nchar(d$Vehicle))
wheeldrive[25] <- NA # manually remove 'ght' (cell without 2WD or 4WD)
d$WD <- wheeldrive
d$WD <- factor(d$WD, levels=c("2WD", "4WD"))
# break data up into subsets by 2WD vs 4WD (or in this case wd2, wd4)
wd4 <- subset(d, d$WD=="4WD")
wd2 <- subset(d, d$WD=="2WD")
###############################################
# ALL DATA
###############################################
# create 2x2 grid for next (4) plots
par(mfrow=c(2,2))
# (1) boxplot for all claim types generally
boxplot(d$Collision, d$PropertyDamage, d$Comprehensive, names=c("Collision", "Property", "Comprehensive"), main="All Ratings", xlab="Insurance Claim Type", ylab="Loss Rating", ylim=c(-70, 70))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (2) histogram for Collision rating
hist(d$Collision, main="Collision", xlab="Loss Rating", breaks=10, xlim=c(-60,80), ylim=c(0, 50))
# (3) histogram for Property Damage rating
hist(d$PropertyDamage, main="Property Damage", xlab="Loss Rating", breaks=10, xlim=c(-60,80), ylim=c(0, 50))
# (4) histogram for Comprehensive rating
hist(d$Comprehensive, main="Comprehensive", xlab="Loss Rating", breaks=10, xlim=c(-60,80), ylim=c(0, 50))
# 5# summary for Collision rating (min, 1st, median, mean, 3rd, max)
summary(d$Collision)
# standard deviation for Collision Damage rating
sd(d$Collision, na.rm=TRUE)
# 5# summary for Property Damage rating (min, 1st, median, mean, 3rd, max)
summary(d$PropertyDamage)
# standard deviation for Property Damage rating
sd(d$PropertyDamage, na.rm=TRUE)
# 5# summary for Comprehensive rating (min, 1st, median, mean, 3rd, max)
summary(d$Comprehensive)
# standard deviation for Comprehensive rating
sd(d$Comprehensive, na.rm=TRUE)
###############################################
# RATINGS WITH PLOTS FOR EACH SIZE
###############################################
# create 2x2 grid for the next (4) plots
par(mfrow=c(2,2))
# (1) boxplot for all loss ratings for Small subset of data
boxplot(small$Collision, small$PropertyDamage, small$Comprehensive, main="Small", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (2) boxplot for all loss ratings for Midsize subset of data
boxplot(midsize$Collision, midsize$PropertyDamage, midsize$Comprehensive, main="Midsize", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (3) boxplot for all loss ratings for Large subset of data
boxplot(large$Collision, large$PropertyDamage, large$Comprehensive, main="Large", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (4) boxplot for all loss ratings for Very Large subset of data
boxplot(vlarge$Collision, vlarge$PropertyDamage, vlarge$Comprehensive, main="Very Large", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
###############################################
# RATINGS WITH PLOTS FOR EACH DRIVE TYPE
###############################################
# create 1x2 grid for next (2) plots
par(mfrow=c(1,2))
# (1) boxplot for all loss ratings for 4WD subset of data
boxplot(wd4$Collision, wd4$PropertyDamage, wd4$Comprehensive, main="4WD", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (2) boxplot for all loss ratings for 2WD subset of data
boxplot(wd2$Collision, wd2$PropertyDamage, wd2$Comprehensive, main="2WD", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
###############################################
# DRIVE TYPES FOR EACH RATING TYPE
###############################################
# create 1x3 grid for next (3) plots
par(mfrow=c(1,3))
# (1) boxplot of Collision rating for 2WD and 4WD
boxplot(d$Collision ~ d$WD, main="Collision", names=c("2WD", "4WD"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (2) boxplot of Property Damage rating for 2WD and 4WD
boxplot(d$PropertyDamage ~ d$WD, main="Property Damage", names=c("2WD", "4WD"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (3) boxplot of Comprehensive rating for 2WD and 4WD
boxplot(d$Comprehensive ~ d$WD, main="Comprehensive", names=c("2WD", "4WD"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
###############################################
# EACH RATING TYPE COMPARED THROUGH ALL SIZES
###############################################
# create 1x3 grid for next (3) plots
par(mfrow=c(1,3))
# (1) boxplot of Collision rating for small, mid, large, vlarge
boxplot(d$Collision ~ d$Size, main="Collision", xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (2) boxplot of Property Damage rating for small, mid, large, vlarge
boxplot(d$PropertyDamage ~ d$Size, main="Property Damage", xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# (3) boxplot of Comprehensive rating for small, mid, large, vlarge
boxplot(d$Comprehensive ~ d$Size, main="Comprehensive", xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
abline(h=0, col="red", lty=3) # draw dotted red line at 0
# # verbose alternative
# # (1) boxplot of Collision rating for small, mid, large, vlarge
# boxplot(small$Collision, midsize$Collision, large$Collision, vlarge$Collision, main="Collision", names=c("Small", "Midsize", "Large", "Very Large"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
# abline(h=0, col="red", lty=3) # draw dotted red line at 0
#
# # (2) boxplot of Property Damage rating for small, mid, large, vlarge
# boxplot(small$PropertyDamage, midsize$PropertyDamage, large$PropertyDamage, vlarge$PropertyDamage, main="Property Damage", names=c("Small", "Midsize", "Large", "Very Large"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
# abline(h=0, col="red", lty=3) # draw dotted red line at 0
#
# # (3) boxplot of Comprehensive rating for small, mid, large, vlarge
# boxplot(small$Comprehensive, midsize$Comprehensive, large$Comprehensive, vlarge$Comprehensive, main="Comprehensive", names=c("Small", "Midsize", "Large", "Very Large"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80))
# abline(h=0, col="red", lty=3) # draw dotted red line at 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment