Prepare the Kickstarter dataset for analysis
# read the file
kickstarter <- fread(file = "./data/ks-projects-201801.csv", header = T)
# remove 7 observations that have incorrect launch dates (year says "1970")
kickstarter = kickstarter[c(-2843, -48148, -75398, -94580, -247914, -273780, -319003),]
# covert deadline values to date type
kickstarter$deadline <- as.Date(kickstarter$deadline, "%Y-%m-%d")
#covert launched values to date type
kickstarter$launched <- as.Date(kickstarter$launched, '%Y-%m-%d %H:%M:%S')
# add a new column for project duration
kickstarter$project_duration_days <- kickstarter$deadline - kickstarter$launched
