Skip to content

Instantly share code, notes, and snippets.

@Aaronmoralesshildrick
Last active September 23, 2022 05:38
Show Gist options
  • Save Aaronmoralesshildrick/d5d5c9e83c96714782962a88105c95d4 to your computer and use it in GitHub Desktop.
Save Aaronmoralesshildrick/d5d5c9e83c96714782962a88105c95d4 to your computer and use it in GitHub Desktop.
##solar sesame assignment!!
#set wd using the session tab in Rstudio
#libraries
library(readr)
library(ggplot2)
library(dplyr)
library(plyr)
library("writexl")
library(lubridate)
#donwload data
call_data <- read_csv("call_data.csv")
View(call_data)
data <- call_data
data_answ <- data[data$Answered != 0, ]
data_unansw <- data[data$Answered == 0, ]
interested <- data[data$Interest !=0, ]
colnames(data) <- c("Agent_Experience","Agent ID", "Team","Territory",
"Interest","Answered","Call_ID", "Call_Start_Date",
"Call_Start_Time", "Call_Length", "US_State_of_Client",
"Policy_InForce?", "Policy_Monthly_Premium","Policy_Submitted?"
)
#exploratory analysis of the data
summary(data)
head(data)
hist(data_answ$Call_Length)
#for figuring out what times to be online
class(data$Call_Start_Time)
class(data$Call_Start_Date)
data$callDate <- mdy_hm(data$Call_Start_Date)
data$callTime <- mdy_hm(data$Call_Start_Time)
data_answ$callDate <- mdy_hm(data_answ$Call_Start_Date)
data_answ$callTime <- mdy_hm(data_answ$Call_Start_Time)
#get only the time
data_answ$callTimeonly <- hour(data_answ$callTime)
policy_holders_active$callTimeonly <- hour(policy_holders_active$callTime)
#see what days we have more calls answered
hist(data$callDate,"days")
hist(data_answ$callDate, "days")
#see what times are more popular for answering calls
time_of_answered_calls <- data.frame(count(data_answ, 'callTimeonly'))
ggplot(time_of_answered_calls, aes(x=time_of_answered_calls$callTimeonly,
y=time_of_answered_calls$freq)) +
geom_bar(stat = "identity",fill="lightblue") + theme_classic()
#from active policy holders... what days did they answer calls
hist(policy_holders_active$callDate, "days")
#from active policy holders... what time did they take the calls
time_of_active_holders <- data.frame(count(policy_holders_active, 'callTimeonly'))
ggplot(time_of_active_holders, aes(x=time_of_active_holders$callTimeonly,
y=time_of_active_holders$freq)) +
geom_bar(stat = "identity",fill="lightblue") + theme_classic()
#for figuring out what states to get licenced
#check which states have the most calls answered
df1 <- data.frame(count(data_answ,'US_State_of_Client'))
#check which states have most calls attempted
df2 <- data.frame(count(data,'US_State_of_Client'))
#this is followed up in sheets
#correlation between price and policy being active
cor(policy_holders[c("anual_premium","Policy_InForce?")])
ggplot(policy_holders, aes(x=policy_holders$`anual_premium`,
y=policy_holders$`Policy_InForce?`)) + geom_point()
#correlation between call time and policy in force
cor(policy_holders[c("Call_Length","Policy_InForce?")])
ggplot(policy_holders, aes(x=policy_holders$`Call_Length`,
y=policy_holders$`Policy_InForce?`)) + geom_point()
policy_holders <- data[data$`Policy_Submitted?` != 0, ]
policy_holders_active <- filter(policy_holders, policy_holders$`Policy_InForce?` != 0)
#calculating anual premium
policy_holders$anual_premium <- policy_holders$Policy_Monthly_Premium * 12
policy_holders_active$anual_premium <- policy_holders_active$Policy_Monthly_Premium * 12
#calculating policy submissions per state
policy_submissions_per_state <- data.frame(count(policy_holders$US_State_of_Client))
#calculating active policies per state
active_policies_per_state <-
data.frame(count(policy_holders_active$US_State_of_Client))
#calculating average anual policy price per state
policy_price_per_state <- data.frame(policy_holders %>%
group_by(US_State_of_Client) %>%
summarise_at(vars(anual_premium), list(name = mean)))
#interest per state
interest_per_state <- data.frame(count(interested$US_State_of_Client))
#Extra information
#high performing teams in number of calls
call_per_team <- data.frame(count(data$Team))
calls_answ_per_team <- data.frame(count(data_answ$Team))
#high performing teams in number of active policy holders
policies_per_team <- data.frame(count(policy_holders$Team))
View(policies_per_team)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment