PLagat/Survey of laptop usage experience.r

## Survey of laptop usage experience.r
#The aim of this group study is to determine how many students from SIMS find the laptops
#provided by the University efficient enough to use on a day-to-day basis for their schoolwork and
#other uses and how many opt for other options.

library(MASS)
library(psych)

#SET WORKING DIRECTORY
setwd("Desktop/")

#LOAD DATASET
data <- read.csv("fin THE EFFICIENCY OF SIMS UNDERGRADUATE STUDENT'S LAPTOPS (Responses) - Form responses 1.csv")

str(data)
colnames(data)

data_drop <- subset(data,select = -c(Timestamp,Email.address,What.year.are.you.in.,If.no.what.personal.device.do.you.use..specify.brand..and.why.do.you.use.the.specified.device.over.the.Strathmore.laptop.,Which.of.the.following.browsers.do.you.have.installed.on.your.device...choose.more.than.1.,Which.of.the.following.data.analysis.software.do.you.have.installed.on.your.device...choose.more.than.1.,What.issues.in.terms.of.both.software.and.hardware.has.your.laptop.had.before.and.still.currently.has...choose.more.than.1.,If.you.were.to.get.a.new.device.from.Strathmore.what.changes.in.terms.of.both.hardware.and.software.would.you.expect.to.see.in.the.new.device.))

names(data_drop)

names(data_drop) <- c("strath_laptop","speed","missing_screws","browsers","software","issues","satisfaction")
names(data_drop)

laptop <- data_drop

#convering to factor
laptop$strath_laptop <- factor(laptop$strath_laptop,levels=c("No","Yes"))
laptop$speed <- factor(laptop$speed)
laptop$satisfaction<- as.ordered(laptop$satisfaction)
str(laptop)

#factor analysis
prop.table(table(laptop$strath_laptop))*100
prop.table(table(laptop$speed))*100
prop.table(table(laptop$satisfaction))*100
'86% of sample have strathmore laptops, speed lies between 3 and 4 mostly,majority are neutral when it comes to satisfaction'

xtabs(~satisfaction+speed,laptop)

#numeric analysis
pairs.panels(laptop[c("missing_screws","browsers","software","issues")])
summary(laptop)

#partition data into train and test
part <- sample(2,nrow(laptop),replace=TRUE,prob=c(0.8,0.2))
train <- laptop[part==1,]
test <- laptop[part==2,]
nrow(train)
'15'
nrow(test)
'7'

#creating a ordinal logistic regression model
model <- polr(satisfaction~speed+missing_screws+browsers+software+issues,train, Hess=TRUE )
summary(model)

#obtaining p values
(ctable <- coef(summary(model)))
p <- pnorm(abs(ctable[,"t value"]),lower.tail=FALSE)*2
(ctable <- cbind(ctable,"p value"=p ))
#no variable is statistically significant

pred <- predict(model,train,type='class')
pred

#confusion matrix for training data
#confusion matrix for test data
(tab <- table(pred,train$satisfaction))
1-sum(diag(tab))/sum(tab)
#missclassification error of 27%
#our model has an accuracy score of 73% for training data

#confusion matrix for test data
pred1 <- predict(model,test,type='class')
pred1

(tab1 <- table(pred1,test$satisfaction))
1-sum(diag(tab1))/sum(tab1)
#missclassification error of 57%
#our model has an accuracy score of 43% for test data
	#The aim of this group study is to determine how many students from SIMS find the laptops
	#provided by the University efficient enough to use on a day-to-day basis for their schoolwork and
	#other uses and how many opt for other options.

	library(MASS)
	library(psych)

	#SET WORKING DIRECTORY
	setwd("Desktop/")

	#LOAD DATASET
	data <- read.csv("fin THE EFFICIENCY OF SIMS UNDERGRADUATE STUDENT'S LAPTOPS (Responses) - Form responses 1.csv")

	str(data)
	colnames(data)

	data_drop <- subset(data,select = -c(Timestamp,Email.address,What.year.are.you.in.,If.no.what.personal.device.do.you.use..specify.brand..and.why.do.you.use.the.specified.device.over.the.Strathmore.laptop.,Which.of.the.following.browsers.do.you.have.installed.on.your.device...choose.more.than.1.,Which.of.the.following.data.analysis.software.do.you.have.installed.on.your.device...choose.more.than.1.,What.issues.in.terms.of.both.software.and.hardware.has.your.laptop.had.before.and.still.currently.has...choose.more.than.1.,If.you.were.to.get.a.new.device.from.Strathmore.what.changes.in.terms.of.both.hardware.and.software.would.you.expect.to.see.in.the.new.device.))

	names(data_drop)

	names(data_drop) <- c("strath_laptop","speed","missing_screws","browsers","software","issues","satisfaction")
	names(data_drop)

	laptop <- data_drop

	#convering to factor
	laptop$strath_laptop <- factor(laptop$strath_laptop,levels=c("No","Yes"))
	laptop$speed <- factor(laptop$speed)
	laptop$satisfaction<- as.ordered(laptop$satisfaction)
	str(laptop)

	#factor analysis
	prop.table(table(laptop$strath_laptop))*100
	prop.table(table(laptop$speed))*100
	prop.table(table(laptop$satisfaction))*100
	'86% of sample have strathmore laptops, speed lies between 3 and 4 mostly,majority are neutral when it comes to satisfaction'

	xtabs(~satisfaction+speed,laptop)

	#numeric analysis
	pairs.panels(laptop[c("missing_screws","browsers","software","issues")])
	summary(laptop)

	#partition data into train and test
	part <- sample(2,nrow(laptop),replace=TRUE,prob=c(0.8,0.2))
	train <- laptop[part==1,]
	test <- laptop[part==2,]
	nrow(train)
	'15'
	nrow(test)
	'7'

	#creating a ordinal logistic regression model
	model <- polr(satisfaction~speed+missing_screws+browsers+software+issues,train, Hess=TRUE )
	summary(model)

	#obtaining p values
	(ctable <- coef(summary(model)))
	p <- pnorm(abs(ctable[,"t value"]),lower.tail=FALSE)*2
	(ctable <- cbind(ctable,"p value"=p ))
	#no variable is statistically significant

	pred <- predict(model,train,type='class')
	pred

	#confusion matrix for training data
	#confusion matrix for test data
	(tab <- table(pred,train$satisfaction))
	1-sum(diag(tab))/sum(tab)
	#missclassification error of 27%
	#our model has an accuracy score of 73% for training data

	#confusion matrix for test data
	pred1 <- predict(model,test,type='class')
	pred1

	(tab1 <- table(pred1,test$satisfaction))
	1-sum(diag(tab1))/sum(tab1)
	#missclassification error of 57%
	#our model has an accuracy score of 43% for test data