Skip to content

Instantly share code, notes, and snippets.

@milesgrimshaw
Last active August 29, 2015 13:57
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save milesgrimshaw/9414109 to your computer and use it in GitHub Desktop.
Save milesgrimshaw/9414109 to your computer and use it in GitHub Desktop.
Data prep for geocoding
# Load desired packages
library(lubridate)
library(stringr)
library(ggplot2)
library(scales)
# Set the working directory
getwd()
setwd("~/Desktop/Patreon/")
# Read in the data
d <- read.csv("Kickstarter/kickstarter_projects.csv", header=FALSE, as.is=TRUE)
# Reame the columns
names(d) <- c("url","backers","goal","pledged","start","end","category", "location", "profile")
# Eliminate blanks
d <- d[which(d$goal!="[]"),]
# Get all the unique locations
# locations <- unique(d$location)
# Write the unique locations to a CSV for geocoding
# locations <- data.frame(locations)
# write.csv(locations,file="Kickstarter_Locations.csv", row.names=FALSE)
# Read back in csv after GeoCoding
l <- read.csv("Kickstarter/kickstarter_locations_coded.csv", header=FALSE, as.is=TRUE)
# Reame the columns
names(l) <- c("location","lat","lon","geo")
# Subset because some projects haven't ended
d <- d[which(d$end!=""),]
# Check no NAs
which(is.na(d$end))
# Create an end data variable
d$end <- as.POSIXct(sapply(d$end, function (t) as.POSIXct(substr(t,1,10),format="%Y-%m-%d"),USE.NAMES=FALSE),origin="1970-01-01")
# Run vectorized functions to merge the two data sets making a lat/lon for each project
d$lat <- sapply(d$location, function (t) (l$lat[which(t==l$location)]))
d$lon <- sapply(d$location, function (t) (l$lon[which(t==l$location)]))
# The total amount actually funded
sum(as.numeric(d$pledged[which(as.numeric(d$pledged)>=as.numeric(d$goal))]))
# The total amount pledged
sum(as.numeric(d$pledged))
# Subset the data to save as a new CSV
df <- data.frame(d$pledged, d$end, d$category, d$location, d$lat, d$lon)
# Save the CSV
write.csv(df,file="kickstarter_pledged_locations_for_upload.csv", row.names=FALSE)
# Could subset those for which pledged > goal
d <- d[which(as.numeric(d$pledged)>=as.numeric(d$goal)),]
# Subset the data to save as a new CSV
df <- data.frame(d$pledged, d$end, d$category, d$location, d$lat, d$lon)
# Save the CSV
write.csv(df,file="kickstarter_successful_locations_for_upload.csv", row.names=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment