Last active
September 26, 2015 08:41
-
-
Save uberscientist/3b69d1ac60dfcd3bad38 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Set working directory | |
#dir_SM <- "F:/Kaitlyn_Toledo_Data/RMP_Project/SMData" | |
dir_SM <- "/home/nak/kate-R/try1" | |
library(reshape2) | |
library(ggplot2) | |
# We an use the recursive option to get all the CSV files | |
csv_filenames <- list.files(dir_SM, | |
recursive=TRUE, | |
pattern="*.csv", | |
full.names=TRUE) | |
# Initialize empty list to store dataframes from each logger | |
# After the for..in loop runs, access each logger with all_data[["Logger1"]] | |
all_data = list() | |
# This loops over all the CSV files, 25 of them | |
for (i in 1:length(csv_filenames)) { | |
data <- read.csv(csv_filenames[i], | |
skip=3, | |
colClasses=c("character", rep("numeric",5)), | |
na.string=c("#N/A","#DIV/0!"), | |
header=FALSE) | |
# Find the logger name from file path | |
logger_regex <- regexpr("Logger[[:digit:]]", csv_filenames[i]) | |
logger_name <- regmatches(csv_filenames[i], logger_regex) | |
# V1 is the first column, V2 is 2nd, etc | |
# Re-name them to match reality | |
port_names <- c("V2" = "Port1", | |
"V3" = "Port2", | |
"V4" = "Port3", | |
"V5" = "Port4", | |
"V6" = "Port5") | |
# Re-name columns | |
names(data) <- c("V1" = "DateTime", port_names) | |
# Try the first Date format, if we get a warning try the 2nd format | |
tryCatch({ | |
data$DateTime <- as.POSIXct(data$DateTime, format="%m/%d/%Y %I:%M %p") | |
}, warning = function(w) { | |
data$DateTime <- as.POSIXct(data$DateTime, format="%m/%d/%Y %H:%M") | |
}) | |
# Only get a subset of dates | |
data <- subset(data, | |
DateTime > "2015-04-01" & | |
DateTime < "2015-08-01") | |
# Bind the rows of this file with previous files loaded into the list | |
all_data[[logger_name]] <- rbind(all_data[[logger_name]], data) | |
} | |
# Now that we've loaded all the CSV data into a list of data frames (all_data) | |
# we can loop over each logger data frame object and then plot it | |
for(logger_name in names(all_data)) { | |
data <- all_data[[logger_name]] | |
# Remove columns that are completely NA (thanks StackOverflow) | |
data <- data[, colSums(is.na(data)) != nrow(data)] | |
# Melt takes a wide data format and makes it into a column instead | |
# This is needed for ggplot2 | |
data <- melt(data, | |
# Here I'm just getting all the names of the ports, avoiding | |
# The DateTime column | |
measure.vars = names(data)[c(2:length(names(data)))], | |
na.rm = TRUE) | |
# Remove data points less than zero | |
data <- subset(data, value > 0) | |
# Print is needed to show ggplots in a loop, that's it! | |
print(ggplot(data, aes(x = DateTime, y = value)) + | |
geom_line() + | |
facet_grid(variable~.)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment