Skip to content

Instantly share code, notes, and snippets.

@kzfm
Last active December 16, 2015 17:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kzfm/5472037 to your computer and use it in GitHub Desktop.
Save kzfm/5472037 to your computer and use it in GitHub Desktop.
ml1
setwd("/Users/kzfm/lang/rcode/ML_for_Hackers/01-Introduction/")
ufo <- read.delim("data/ufo/ufo_awesome.tsv", sep="\t", stringsAsFactors=FALSE, header=FALSE, na.strings="")
names(ufo) <- c("DateOccurred", "DateReported", "Location", "ShortDescription", "Duration", "LongDescription")
good.rows <- ifelse(nchar(ufo$DateOccurred) !=8 | nchar(ufo$DateReported) !=8, FALSE, TRUE)
ufo <- ufo[good.rows,]
ufo$DateOccurred <- as.Date(ufo$DateOccurred, format="%Y%m%d")
ufo$DateReported <- as.Date(ufo$DateReported, format="%Y%m%d")
get.location <- function(l) {
split.location <- tryCatch(strsplit(l,",")[[1]], error= function(e) return(c(NA, NA)))
clean.location <- gsub("^ ", "", split.location)
if(length(clean.location) > 2) {
return(c(NA, NA))
} else {
return(clean.location)
}
}
city.state <- lapply(ufo$Location, get.location)
location.matrix <- do.call(rbind, city.state)
ufo <- transform(ufo, USCity=location.matrix[,1], USState=location.matrix[,2],stringsAsFactors=FALSE)
ufo$USState <- state.abb[match(ufo$USState, state.abb)]
ufo$USCity[is.na(ufo$USState)] <- NA
ufo.us <- subset(ufo, !is.na(USState))
###
sightings.counts <- ddply(ufo.us, .(USState,YearMonth), nrow)
date.range <- seq.Date(from = as.Date(min(ufo.us$DateOccurred)),
to = as.Date(max(ufo.us$DateOccurred)),
by = "month")
date.strings <- strftime(date.range, "%Y-%m")
states.dates <- lapply(state.abb, function(s) cbind(s, date.strings))
states.dates <- data.frame(do.call(rbind, states.dates), stringsAsFactors = FALSE)
# left outer join
all.sightings <- merge(states.dates,
sightings.counts,
by.x = c("s", "date.strings"),
by.y = c("USState", "YearMonth"),
all = TRUE)
names(all.sightings) <- c("State", "YearMonth", "Sightings")
all.sightings$Sightings[is.na(all.sightings$Sightings)] <- 0
all.sightings$YearMonth <- as.Date(rep(date.range, length(state.abb)))
all.sightings$State <- as.factor(all.sightings$State)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment