uberscientist/logger_plotter.r

## logger_plotter.r
#Set working directory
#dir_SM <- "F:/Kaitlyn_Toledo_Data/RMP_Project/SMData"
dir_SM <- "/home/nak/kate-R/try1"

library(reshape2)
library(ggplot2)

# We an use the recursive option to get all the CSV files
csv_filenames <- list.files(dir_SM,
                            recursive=TRUE,
                            pattern="*.csv",
                            full.names=TRUE)

# Initialize empty list to store dataframes from each logger
# After the for..in loop runs, access each logger with all_data[["Logger1"]]
all_data = list()

# This loops over all the CSV files, 25 of them
for (i in 1:length(csv_filenames)) {
  data <- read.csv(csv_filenames[i],
                   skip=3,
                   colClasses=c("character", rep("numeric",5)),
                   na.string=c("#N/A","#DIV/0!"),
                   header=FALSE)

  # Find the logger name from file path
  logger_regex <- regexpr("Logger[[:digit:]]", csv_filenames[i])
  logger_name <- regmatches(csv_filenames[i], logger_regex)

  # V1 is the first column, V2 is 2nd, etc
  # Re-name them to match reality
  port_names <- c("V2" = "Port1",
                  "V3" = "Port2",
                  "V4" = "Port3",
                  "V5" = "Port4",
                  "V6" = "Port5")

  # Re-name columns
  names(data) <- c("V1" = "DateTime", port_names)

  # Try the first Date format, if we get a warning try the 2nd format
  tryCatch({
    data$DateTime <- as.POSIXct(data$DateTime, format="%m/%d/%Y %I:%M %p")
  }, warning = function(w) {
    data$DateTime <- as.POSIXct(data$DateTime, format="%m/%d/%Y %H:%M")
  })

  # Only get a subset of dates
  data <- subset(data,
                 DateTime > "2015-04-01" &
                 DateTime < "2015-08-01")

  # Bind the rows of this file with previous files loaded into the list
  all_data[[logger_name]] <- rbind(all_data[[logger_name]], data)
}

# Now that we've loaded all the CSV data into a list of data frames (all_data)
# we can loop over each logger data frame object and then plot it
for(logger_name in names(all_data)) {
  data <- all_data[[logger_name]]

  # Remove columns that are completely NA (thanks StackOverflow)
  data <- data[, colSums(is.na(data)) != nrow(data)]

  # Melt takes a wide data format and makes it into a column instead
  # This is needed for ggplot2
  data <- melt(data,

               # Here I'm just getting all the names of the ports, avoiding
               # The DateTime column
               measure.vars = names(data)[c(2:length(names(data)))],
               na.rm = TRUE)

  # Remove data points less than zero
  data <- subset(data, value > 0)

  # Print is needed to show ggplots in a loop, that's it!
  print(ggplot(data, aes(x = DateTime, y = value)) +
    geom_line() +
    facet_grid(variable~.))
}
	#Set working directory
	#dir_SM <- "F:/Kaitlyn_Toledo_Data/RMP_Project/SMData"
	dir_SM <- "/home/nak/kate-R/try1"

	library(reshape2)
	library(ggplot2)

	# We an use the recursive option to get all the CSV files
	csv_filenames <- list.files(dir_SM,
	recursive=TRUE,
	pattern="*.csv",
	full.names=TRUE)

	# Initialize empty list to store dataframes from each logger
	# After the for..in loop runs, access each logger with all_data[["Logger1"]]
	all_data = list()

	# This loops over all the CSV files, 25 of them
	for (i in 1:length(csv_filenames)) {
	data <- read.csv(csv_filenames[i],
	skip=3,
	colClasses=c("character", rep("numeric",5)),
	na.string=c("#N/A","#DIV/0!"),
	header=FALSE)

	# Find the logger name from file path
	logger_regex <- regexpr("Logger[[:digit:]]", csv_filenames[i])
	logger_name <- regmatches(csv_filenames[i], logger_regex)

	# V1 is the first column, V2 is 2nd, etc
	# Re-name them to match reality
	port_names <- c("V2" = "Port1",
	"V3" = "Port2",
	"V4" = "Port3",
	"V5" = "Port4",
	"V6" = "Port5")

	# Re-name columns
	names(data) <- c("V1" = "DateTime", port_names)

	# Try the first Date format, if we get a warning try the 2nd format
	tryCatch({
	data$DateTime <- as.POSIXct(data$DateTime, format="%m/%d/%Y %I:%M %p")
	}, warning = function(w) {
	data$DateTime <- as.POSIXct(data$DateTime, format="%m/%d/%Y %H:%M")
	})

	# Only get a subset of dates
	data <- subset(data,
	DateTime > "2015-04-01" &
	DateTime < "2015-08-01")

	# Bind the rows of this file with previous files loaded into the list
	all_data[[logger_name]] <- rbind(all_data[[logger_name]], data)
	}

	# Now that we've loaded all the CSV data into a list of data frames (all_data)
	# we can loop over each logger data frame object and then plot it
	for(logger_name in names(all_data)) {
	data <- all_data[[logger_name]]

	# Remove columns that are completely NA (thanks StackOverflow)
	data <- data[, colSums(is.na(data)) != nrow(data)]

	# Melt takes a wide data format and makes it into a column instead
	# This is needed for ggplot2
	data <- melt(data,

	# Here I'm just getting all the names of the ports, avoiding
	# The DateTime column
	measure.vars = names(data)[c(2:length(names(data)))],
	na.rm = TRUE)

	# Remove data points less than zero
	data <- subset(data, value > 0)

	# Print is needed to show ggplots in a loop, that's it!
	print(ggplot(data, aes(x = DateTime, y = value)) +
	geom_line() +
	facet_grid(variable~.))
	}