Skip to content

Instantly share code, notes, and snippets.

@svmiller
Last active February 19, 2020 16:07
Show Gist options
  • Save svmiller/b3322b027ba20c05d9e2ba72581c16be to your computer and use it in GitHub Desktop.
Save svmiller/b3322b027ba20c05d9e2ba72581c16be to your computer and use it in GitHub Desktop.
Make/Test Various GSS Formats
setwd("~/Dropbox/data/gss/GSS_spss-2018")
library(tidyverse)
# haven::read_sav method -----
# ----------------------------
readsav_times = data.frame()
for(i in 1:10) {
main_call <- system.time(GSS7218 <- haven::read_sav("GSS7218_R1.sav"))
times <- as.data.frame(t(data.matrix(main_call)))
readsav_times = rbind(readsav_times, times)
print(times)
}
readsav_times %>%
mutate(Method = "haven::read_sav") -> readsav_times
GSS7218 %>%
rename_all(tolower) -> GSS7218
saveRDS(GSS7218,file="gss7218.rds")
library(qs)
qsave(GSS7218, "gss7218.qs")
library(fst)
write_fst(GSS7218, "gss7218.fst")
paste(round(file.size("gss7218.qs")/1e6, 2), "MB")
paste(round(file.size("gss7218.rds")/1e6, 2), "MB")
# readRDS method -----
# --------------------
readrds_times = data.frame()
for(i in 1:10) {
main_call <- system.time(GSS7218 <- readRDS("gss7218.rds"))
times <- as.data.frame(t(data.matrix(main_call)))
readrds_times = rbind(readrds_times, times)
print(times)
}
readrds_times %>%
mutate(Method = "base::readRDS") -> readrds_times
# qs method -----
# ---------------
qread_times = data.frame()
for(i in 1:10) {
main_call <- system.time(GSS7218 <- qread("gss7218.qs"))
times <- as.data.frame(t(data.matrix(main_call)))
qread_times = rbind(qread_times, times)
print(times)
}
qread_times %>%
mutate(Method = "qs::qread") -> qread_times
# fst method ----
# ---------------
readfst_times = data.frame()
for(i in 1:10) {
main_call <- system.time(GSS7218 <- read_fst("gss7218.fst"))
times <- as.data.frame(t(data.matrix(main_call)))
readfst_times = rbind(readfst_times, times)
print(times)
}
readfst_times %>%
mutate(Method = "fst::read_fst") -> readfst_times
bind_rows(readsav_times, readrds_times, qread_times, readfst_times) -> readtimes
# saveRDS method -----
# --------------------
saverds_times = data.frame()
for(i in 1:10) {
main_call <- system.time(saveRDS(GSS7218,file="gss7218.rds"))
times <- as.data.frame(t(data.matrix(main_call)))
saverds_times = rbind(saverds_times, times)
print(times)
}
saverds_times %>%
mutate(Method = "base::saveRDS") -> saverds_times
# qs method -----
# ---------------
qsave_times = data.frame()
for(i in 1:10) {
main_call <- system.time(qsave(GSS7218, "gss7218.qs"))
times <- as.data.frame(t(data.matrix(main_call)))
qsave_times = rbind(qsave_times, times)
print(times)
}
qsave_times %>%
mutate(Method = "qs::qsave") -> qsave_times
# fst method ----
# ---------------
writefst_times = data.frame()
for(i in 1:10) {
main_call <- system.time(write_fst(GSS7218, "gss7218.fst"))
times <- as.data.frame(t(data.matrix(main_call)))
writefst_times = rbind(writefst_times, times)
print(times)
}
writefst_times %>%
mutate(Method = "fst::write_fst") -> writefst_times
bind_rows(readrds_times, qread_times, readsav_times, readfst_times) %>%
mutate(Category = "Read Times") %>%
bind_rows(., saverds_times, writefst_times, qsave_times) %>%
mutate(Category = ifelse(is.na(Category), "Save Times", Category)) -> gsstimes
write_csv(gsstimes, "gsstimes.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment