Aaronmoralesshildrick/SolarSesame

## SolarSesame
##solar sesame assignment!!

#set wd using the session tab in Rstudio

#libraries
library(readr)
library(ggplot2)
library(dplyr)
library(plyr)
library("writexl")
library(lubridate)

#donwload data
call_data <- read_csv("call_data.csv")
View(call_data)
data <- call_data
data_answ <- data[data$Answered != 0, ]
data_unansw <- data[data$Answered == 0, ]
interested <- data[data$Interest !=0, ]
colnames(data) <- c("Agent_Experience","Agent ID", "Team","Territory",
                    "Interest","Answered","Call_ID", "Call_Start_Date",
                    "Call_Start_Time", "Call_Length", "US_State_of_Client",
                    "Policy_InForce?", "Policy_Monthly_Premium","Policy_Submitted?"
)


#exploratory analysis of the data
summary(data)
head(data)
hist(data_answ$Call_Length)

#for figuring out what times to be online
class(data$Call_Start_Time)
class(data$Call_Start_Date)
data$callDate <- mdy_hm(data$Call_Start_Date)
data$callTime <- mdy_hm(data$Call_Start_Time)
data_answ$callDate <- mdy_hm(data_answ$Call_Start_Date)
data_answ$callTime <- mdy_hm(data_answ$Call_Start_Time)
#get only the time
data_answ$callTimeonly <- hour(data_answ$callTime)
policy_holders_active$callTimeonly <- hour(policy_holders_active$callTime)

#see what days we have more calls answered
hist(data$callDate,"days")
hist(data_answ$callDate, "days")

#see what times are more popular for answering calls
time_of_answered_calls <- data.frame(count(data_answ, 'callTimeonly'))
ggplot(time_of_answered_calls, aes(x=time_of_answered_calls$callTimeonly,
                                   y=time_of_answered_calls$freq)) +
  geom_bar(stat = "identity",fill="lightblue") + theme_classic()


#from active policy holders... what days did they answer calls
hist(policy_holders_active$callDate, "days")

#from active policy holders... what time did they take the calls
time_of_active_holders <- data.frame(count(policy_holders_active, 'callTimeonly'))
ggplot(time_of_active_holders, aes(x=time_of_active_holders$callTimeonly,
                                   y=time_of_active_holders$freq)) +
  geom_bar(stat = "identity",fill="lightblue") + theme_classic()


#for figuring out what states to get licenced
#check which states have the most calls answered
df1 <- data.frame(count(data_answ,'US_State_of_Client'))
#check which states have most calls attempted
df2 <- data.frame(count(data,'US_State_of_Client'))
#this is followed up in sheets

#correlation between price and policy being active
cor(policy_holders[c("anual_premium","Policy_InForce?")])
ggplot(policy_holders, aes(x=policy_holders$`anual_premium`,
                           y=policy_holders$`Policy_InForce?`)) + geom_point()

#correlation between call time and policy in force
cor(policy_holders[c("Call_Length","Policy_InForce?")])
ggplot(policy_holders, aes(x=policy_holders$`Call_Length`,
                           y=policy_holders$`Policy_InForce?`)) + geom_point()


policy_holders <- data[data$`Policy_Submitted?` != 0, ]
policy_holders_active <- filter(policy_holders, policy_holders$`Policy_InForce?` != 0)

#calculating anual premium
policy_holders$anual_premium <- policy_holders$Policy_Monthly_Premium * 12
policy_holders_active$anual_premium <- policy_holders_active$Policy_Monthly_Premium * 12

#calculating policy submissions per state
policy_submissions_per_state <- data.frame(count(policy_holders$US_State_of_Client))
#calculating active policies per state
active_policies_per_state <-
  data.frame(count(policy_holders_active$US_State_of_Client))
#calculating average anual policy price per state
policy_price_per_state <- data.frame(policy_holders %>%
  group_by(US_State_of_Client) %>%
  summarise_at(vars(anual_premium), list(name = mean)))


#interest per state
interest_per_state <- data.frame(count(interested$US_State_of_Client))


#Extra information
#high performing teams in number of calls
call_per_team <- data.frame(count(data$Team))
calls_answ_per_team <- data.frame(count(data_answ$Team))

#high performing teams in number of active policy holders
policies_per_team <- data.frame(count(policy_holders$Team))
View(policies_per_team)
	##solar sesame assignment!!

	#set wd using the session tab in Rstudio

	#libraries
	library(readr)
	library(ggplot2)
	library(dplyr)
	library(plyr)
	library("writexl")
	library(lubridate)

	#donwload data
	call_data <- read_csv("call_data.csv")
	View(call_data)
	data <- call_data
	data_answ <- data[data$Answered != 0, ]
	data_unansw <- data[data$Answered == 0, ]
	interested <- data[data$Interest !=0, ]
	colnames(data) <- c("Agent_Experience","Agent ID", "Team","Territory",
	"Interest","Answered","Call_ID", "Call_Start_Date",
	"Call_Start_Time", "Call_Length", "US_State_of_Client",
	"Policy_InForce?", "Policy_Monthly_Premium","Policy_Submitted?"
	)


	#exploratory analysis of the data
	summary(data)
	head(data)
	hist(data_answ$Call_Length)

	#for figuring out what times to be online
	class(data$Call_Start_Time)
	class(data$Call_Start_Date)
	data$callDate <- mdy_hm(data$Call_Start_Date)
	data$callTime <- mdy_hm(data$Call_Start_Time)
	data_answ$callDate <- mdy_hm(data_answ$Call_Start_Date)
	data_answ$callTime <- mdy_hm(data_answ$Call_Start_Time)
	#get only the time
	data_answ$callTimeonly <- hour(data_answ$callTime)
	policy_holders_active$callTimeonly <- hour(policy_holders_active$callTime)

	#see what days we have more calls answered
	hist(data$callDate,"days")
	hist(data_answ$callDate, "days")

	#see what times are more popular for answering calls
	time_of_answered_calls <- data.frame(count(data_answ, 'callTimeonly'))
	ggplot(time_of_answered_calls, aes(x=time_of_answered_calls$callTimeonly,
	y=time_of_answered_calls$freq)) +
	geom_bar(stat = "identity",fill="lightblue") + theme_classic()


	#from active policy holders... what days did they answer calls
	hist(policy_holders_active$callDate, "days")

	#from active policy holders... what time did they take the calls
	time_of_active_holders <- data.frame(count(policy_holders_active, 'callTimeonly'))
	ggplot(time_of_active_holders, aes(x=time_of_active_holders$callTimeonly,
	y=time_of_active_holders$freq)) +
	geom_bar(stat = "identity",fill="lightblue") + theme_classic()






	#for figuring out what states to get licenced
	#check which states have the most calls answered
	df1 <- data.frame(count(data_answ,'US_State_of_Client'))
	#check which states have most calls attempted
	df2 <- data.frame(count(data,'US_State_of_Client'))
	#this is followed up in sheets

	#correlation between price and policy being active
	cor(policy_holders[c("anual_premium","Policy_InForce?")])
	ggplot(policy_holders, aes(x=policy_holders$`anual_premium`,
	y=policy_holders$`Policy_InForce?`)) + geom_point()

	#correlation between call time and policy in force
	cor(policy_holders[c("Call_Length","Policy_InForce?")])
	ggplot(policy_holders, aes(x=policy_holders$`Call_Length`,
	y=policy_holders$`Policy_InForce?`)) + geom_point()



	policy_holders <- data[data$`Policy_Submitted?` != 0, ]
	policy_holders_active <- filter(policy_holders, policy_holders$`Policy_InForce?` != 0)

	#calculating anual premium
	policy_holders$anual_premium <- policy_holders$Policy_Monthly_Premium * 12
	policy_holders_active$anual_premium <- policy_holders_active$Policy_Monthly_Premium * 12

	#calculating policy submissions per state
	policy_submissions_per_state <- data.frame(count(policy_holders$US_State_of_Client))
	#calculating active policies per state
	active_policies_per_state <-
	data.frame(count(policy_holders_active$US_State_of_Client))
	#calculating average anual policy price per state
	policy_price_per_state <- data.frame(policy_holders %>%
	group_by(US_State_of_Client) %>%
	summarise_at(vars(anual_premium), list(name = mean)))


	#interest per state
	interest_per_state <- data.frame(count(interested$US_State_of_Client))










	#Extra information
	#high performing teams in number of calls
	call_per_team <- data.frame(count(data$Team))
	calls_answ_per_team <- data.frame(count(data_answ$Team))

	#high performing teams in number of active policy holders
	policies_per_team <- data.frame(count(policy_holders$Team))
	View(policies_per_team)