Last active
May 10, 2021 19:17
-
-
Save bjurban/ef74a5accf42c43480a1 to your computer and use it in GitHub Desktop.
Combine a directory of HOBO .csv files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Combine multiple raw HOBO data files into a single .csv file | |
# Author: Bryan Urban | |
# Email: burban@fraunhofer.org | |
# Date: 2015-04-30 | |
## SETUP -------------------- | |
# install these two packages first if you don't have them: | |
# install.packages("data.table") | |
# install.packages("lubridate") | |
library(data.table) | |
library(lubridate) | |
# change these to match the folders containing the data | |
raw_dir <- "D:/data/project/raw" | |
out_dir <- "D:/data/project" | |
## LOAD RAW DATA ------------ | |
# get file names: | |
pattern = ".*csv$" # for identifying files to read | |
fns <- list.files(raw_dir, pattern=pattern, full.names=TRUE) | |
# load data into lists | |
read_and_label <- function(x,...){ | |
z <- fread(x,...) | |
# add file name without the extension as id column | |
pattern <- "(.*\\/)([^.]+)(\\.csv$)" | |
z$ids <- sub(pattern, "\\2", x) | |
z | |
} | |
# reads columns 2 and 3 (timestamp and temperature) into a list of data.table | |
all_data <- | |
lapply(fns, function(x,...) {try(read_and_label(x,...))}, | |
select=2:3, header=FALSE, skip=2 | |
) | |
## PROCESS RAW DATA --------- | |
# drop errors, merge into one large data.table, name columns, parse timestamp | |
all_data <- all_data[sapply(all_data, is.data.table)] | |
all_data <- rbindlist(all_data) | |
setnames(all_data, c("ts", "temp","ids")) | |
all_data[, ts:=floor_date(mdy_hms(ts), "minute")] # floor ts to nearest minute | |
# drop rows with missing data | |
all_data <- all_data[complete.cases(all_data),] | |
## SAVE PROCESSED DATA ------ | |
# write data in one big file | |
write.csv(all_data, paste(out_dir, "all_data.csv", sep="/"), | |
row.names=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
many thanks for sharing your code!. It was very useful to me. I've adapted your code to my purposes, but I'am maintaining your author credits. 👍