dougyoung/MultivariableLogisticRegressionExploration.R

## MultivariableLogisticRegressionExploration.R
# Set working directory
setwd('/Users/dougyoung/Downloads')

# Load data
inboundLeadData <- read.csv('Inbound Lead Regression Analysis Data.csv', stringsAsFactors=TRUE)

# Inspect data
class(inboundLeadData)
dim(inboundLeadData)
names(inboundLeadData)
head(inboundLeadData)
summary(inboundLeadData)
levels(inboundLeadData$Size)

# Subset to relevant columns [rows, columns]
inboundLeadData <- inboundLeadData[, c(3:7, 9, 15)]

# Inspect data
class(inboundLeadData)
dim(inboundLeadData)
names(inboundLeadData)
head(inboundLeadData)
summary(inboundLeadData)
levels(inboundLeadData$Size)

# Show classes of each column
lapply(inboundLeadData, class)

# Fit some models with a single input variable
# Current.Snack.Spend..L.
model <- glm(
  Won ~
    Current.Snack.Spend..L.,
  family=binomial(link='logit'),
  data=inboundLeadData
)

levels(inboundLeadData$Current.Snack.Spend..L.)
plot(model$coefficients)
sort(model$coefficients, decreasing=TRUE)

# Would.spend..299.on.snacks...L.
model <- glm(
  Won ~
    Would.spend..299.on.snacks...L.,
  family=binomial(link='logit'),
  data=inboundLeadData
)

levels(inboundLeadData$Would.spend..299.on.snacks...L.)
plot(model$coefficients)
sort(model$coefficients, decreasing=TRUE)

# Size
model <- glm(
  Won ~
    Size,
  family=binomial(link='logit'),
  data=inboundLeadData
)

levels(inboundLeadData$Size)
plot(model$coefficients)
sort(model$coefficients, decreasing=TRUE)

# Fit a generalized binomial logit model with discrete inputs and binary output
# TODO: Why warning?
model <- glm(
  Won ~
    Current.Snack.Spend..L. +
    Current.Snack.Offering..L. +
    Would.spend..299.on.snacks...L. +
    Size +
    Who.makes.the.snack.buying.decisions.,
  family=binomial(link='logit'),
  data=inboundLeadData
)

# Let's make a prediction!
predict.glm(
  model,
  newdata=data.frame(
    Current.Snack.Spend..L.=as.factor('Not sure, but a little'),
    Current.Snack.Offering..L.=as.factor('Local Grocery Store'),
    Would.spend..299.on.snacks...L.=as.factor('No'),
    Size=as.factor('41-60'),
    Who.makes.the.snack.buying.decisions.=as.factor('Me and only me'),
    Job.Title=as.factor('Accountant')
  ),
  type="response"
)

# Notes:
# 1. Too many degrees of freedom in job title.
# 2. Size should be number instead of factor.
	# Set working directory
	setwd('/Users/dougyoung/Downloads')

	# Load data
	inboundLeadData <- read.csv('Inbound Lead Regression Analysis Data.csv', stringsAsFactors=TRUE)

	# Inspect data
	class(inboundLeadData)
	dim(inboundLeadData)
	names(inboundLeadData)
	head(inboundLeadData)
	summary(inboundLeadData)
	levels(inboundLeadData$Size)

	# Subset to relevant columns [rows, columns]
	inboundLeadData <- inboundLeadData[, c(3:7, 9, 15)]

	# Inspect data
	class(inboundLeadData)
	dim(inboundLeadData)
	names(inboundLeadData)
	head(inboundLeadData)
	summary(inboundLeadData)
	levels(inboundLeadData$Size)

	# Show classes of each column
	lapply(inboundLeadData, class)

	# Fit some models with a single input variable
	# Current.Snack.Spend..L.
	model <- glm(
	Won ~
	Current.Snack.Spend..L.,
	family=binomial(link='logit'),
	data=inboundLeadData
	)

	levels(inboundLeadData$Current.Snack.Spend..L.)
	plot(model$coefficients)
	sort(model$coefficients, decreasing=TRUE)

	# Would.spend..299.on.snacks...L.
	model <- glm(
	Won ~
	Would.spend..299.on.snacks...L.,
	family=binomial(link='logit'),
	data=inboundLeadData
	)

	levels(inboundLeadData$Would.spend..299.on.snacks...L.)
	plot(model$coefficients)
	sort(model$coefficients, decreasing=TRUE)

	# Size
	model <- glm(
	Won ~
	Size,
	family=binomial(link='logit'),
	data=inboundLeadData
	)

	levels(inboundLeadData$Size)
	plot(model$coefficients)
	sort(model$coefficients, decreasing=TRUE)

	# Fit a generalized binomial logit model with discrete inputs and binary output
	# TODO: Why warning?
	model <- glm(
	Won ~
	Current.Snack.Spend..L. +
	Current.Snack.Offering..L. +
	Would.spend..299.on.snacks...L. +
	Size +
	Who.makes.the.snack.buying.decisions.,
	family=binomial(link='logit'),
	data=inboundLeadData
	)

	# Let's make a prediction!
	predict.glm(
	model,
	newdata=data.frame(
	Current.Snack.Spend..L.=as.factor('Not sure, but a little'),
	Current.Snack.Offering..L.=as.factor('Local Grocery Store'),
	Would.spend..299.on.snacks...L.=as.factor('No'),
	Size=as.factor('41-60'),
	Who.makes.the.snack.buying.decisions.=as.factor('Me and only me'),
	Job.Title=as.factor('Accountant')
	),
	type="response"
	)

	# Notes:
	# 1. Too many degrees of freedom in job title.
	# 2. Size should be number instead of factor.