Skip to content

Instantly share code, notes, and snippets.

@emraher
Created December 3, 2014 06:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save emraher/3dd3b40bc5ed50bd9503 to your computer and use it in GitHub Desktop.
Save emraher/3dd3b40bc5ed50bd9503 to your computer and use it in GitHub Desktop.
setwd("~/Dropbox/Tourism/csv")
rm(list=ls())
# Libraries ====================================================================
library("plyr")
library("reshape2")
library("zoo")
# Functions ====================================================================
# Delete incomplete cases
# http://stackoverflow.com/a/11258247
completeFun <- function(data, desiredCols) {
completeVec <- complete.cases(data[, desiredCols])
return(data[completeVec, ])
}
# http://stackoverflow.com/a/3611619
setAs("character", "num.with.dots",
function(from) as.numeric(gsub("\\.", "", from)))
# Start Convert ================================================================
file_list <- list.files()
for (file in file_list){
data <- read.csv(file,
header = TRUE,
encoding = "utf-8",
na.strings = "",
stringsAsFactors = FALSE,
skip = 10,
colClasses = c(rep("character",3), rep("num.with.dots",13)))
data <- completeFun(data, "Arriving.gate.by.city")
data <- subset(data, Arriving.gate.by.city!="Province total")
# Drop Turkey Total
data <- subset(data, Arriving.gate.by.city!="General total")
# Drop irrelevant columns
drops <- c("Province.name","Total")
data <- data[,!(names(data) %in% drops)]
# Replace NAs with zeros
data[is.na(data)] <- 0
# Split Arriving.gate.by.city into two columns
list <- strsplit(data$Arriving.gate.by.city, ",")
data$City <- ldply(list)[[1]]
data$Gate <- ldply(list)[[2]]
# Drop irrelevant columns
drops <- c("Arriving.gate.by.city")
data <- data[,!(names(data) %in% drops)]
# Change colname Mode
colnames(data)[1] <- "Mode"
# Melt Data
data <- melt(data, id = c("Mode", "City", "Gate"))
colnames(data) <- c("Mode", "City", "Gate", "Month", "Value")
# Add Year
data$Year <- as.numeric(gsub("\\.csv", "", file))
# Convert type =================================================================
data$Month <- as.factor(data$Month)
data$Year <- as.factor(data$Year)
data$Mode <- as.factor(data$Mode)
data$City <- as.factor(gsub("\\s","", data$City))
data$Gate <- as.factor(gsub("\\s","", data$Gate))
# Create date variable =========================================================
data$Date <- as.Date(paste("01", data$Month, data$Year, sep = "-"), "%d-%B-%Y")
# Drop irrelevant columns
drops <- c("Month", "Year")
data <- data[,!(names(data) %in% drops)]
# Save
saveRDS(data, file = paste("../Rdata/data", gsub("\\.csv", "", file), ".rds", sep="_"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment