Last active
September 23, 2022 05:38
-
-
Save Aaronmoralesshildrick/d5d5c9e83c96714782962a88105c95d4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##solar sesame assignment!! | |
#set wd using the session tab in Rstudio | |
#libraries | |
library(readr) | |
library(ggplot2) | |
library(dplyr) | |
library(plyr) | |
library("writexl") | |
library(lubridate) | |
#donwload data | |
call_data <- read_csv("call_data.csv") | |
View(call_data) | |
data <- call_data | |
data_answ <- data[data$Answered != 0, ] | |
data_unansw <- data[data$Answered == 0, ] | |
interested <- data[data$Interest !=0, ] | |
colnames(data) <- c("Agent_Experience","Agent ID", "Team","Territory", | |
"Interest","Answered","Call_ID", "Call_Start_Date", | |
"Call_Start_Time", "Call_Length", "US_State_of_Client", | |
"Policy_InForce?", "Policy_Monthly_Premium","Policy_Submitted?" | |
) | |
#exploratory analysis of the data | |
summary(data) | |
head(data) | |
hist(data_answ$Call_Length) | |
#for figuring out what times to be online | |
class(data$Call_Start_Time) | |
class(data$Call_Start_Date) | |
data$callDate <- mdy_hm(data$Call_Start_Date) | |
data$callTime <- mdy_hm(data$Call_Start_Time) | |
data_answ$callDate <- mdy_hm(data_answ$Call_Start_Date) | |
data_answ$callTime <- mdy_hm(data_answ$Call_Start_Time) | |
#get only the time | |
data_answ$callTimeonly <- hour(data_answ$callTime) | |
policy_holders_active$callTimeonly <- hour(policy_holders_active$callTime) | |
#see what days we have more calls answered | |
hist(data$callDate,"days") | |
hist(data_answ$callDate, "days") | |
#see what times are more popular for answering calls | |
time_of_answered_calls <- data.frame(count(data_answ, 'callTimeonly')) | |
ggplot(time_of_answered_calls, aes(x=time_of_answered_calls$callTimeonly, | |
y=time_of_answered_calls$freq)) + | |
geom_bar(stat = "identity",fill="lightblue") + theme_classic() | |
#from active policy holders... what days did they answer calls | |
hist(policy_holders_active$callDate, "days") | |
#from active policy holders... what time did they take the calls | |
time_of_active_holders <- data.frame(count(policy_holders_active, 'callTimeonly')) | |
ggplot(time_of_active_holders, aes(x=time_of_active_holders$callTimeonly, | |
y=time_of_active_holders$freq)) + | |
geom_bar(stat = "identity",fill="lightblue") + theme_classic() | |
#for figuring out what states to get licenced | |
#check which states have the most calls answered | |
df1 <- data.frame(count(data_answ,'US_State_of_Client')) | |
#check which states have most calls attempted | |
df2 <- data.frame(count(data,'US_State_of_Client')) | |
#this is followed up in sheets | |
#correlation between price and policy being active | |
cor(policy_holders[c("anual_premium","Policy_InForce?")]) | |
ggplot(policy_holders, aes(x=policy_holders$`anual_premium`, | |
y=policy_holders$`Policy_InForce?`)) + geom_point() | |
#correlation between call time and policy in force | |
cor(policy_holders[c("Call_Length","Policy_InForce?")]) | |
ggplot(policy_holders, aes(x=policy_holders$`Call_Length`, | |
y=policy_holders$`Policy_InForce?`)) + geom_point() | |
policy_holders <- data[data$`Policy_Submitted?` != 0, ] | |
policy_holders_active <- filter(policy_holders, policy_holders$`Policy_InForce?` != 0) | |
#calculating anual premium | |
policy_holders$anual_premium <- policy_holders$Policy_Monthly_Premium * 12 | |
policy_holders_active$anual_premium <- policy_holders_active$Policy_Monthly_Premium * 12 | |
#calculating policy submissions per state | |
policy_submissions_per_state <- data.frame(count(policy_holders$US_State_of_Client)) | |
#calculating active policies per state | |
active_policies_per_state <- | |
data.frame(count(policy_holders_active$US_State_of_Client)) | |
#calculating average anual policy price per state | |
policy_price_per_state <- data.frame(policy_holders %>% | |
group_by(US_State_of_Client) %>% | |
summarise_at(vars(anual_premium), list(name = mean))) | |
#interest per state | |
interest_per_state <- data.frame(count(interested$US_State_of_Client)) | |
#Extra information | |
#high performing teams in number of calls | |
call_per_team <- data.frame(count(data$Team)) | |
calls_answ_per_team <- data.frame(count(data_answ$Team)) | |
#high performing teams in number of active policy holders | |
policies_per_team <- data.frame(count(policy_holders$Team)) | |
View(policies_per_team) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment