Skip to content

Instantly share code, notes, and snippets.

@statcompute
Created November 24, 2018 06:31
Show Gist options
  • Save statcompute/128b10afa6e1b911d9ba03d254d00413 to your computer and use it in GitHub Desktop.
Save statcompute/128b10afa6e1b911d9ba03d254d00413 to your computer and use it in GitHub Desktop.
Converting data frame to dictionary list
### LIST() FUNCTION IN BASE PACKAGE ###
x1 <- as.list(iris[1, ])
names(x1)
# [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
x1[["Sepal.Length"]]
# [1] 5.1
### ENVIRONMENT-BASED SOLUTION ###
envn_dict <- function(x) {
e <- new.env(hash = TRUE)
for (name in names(x)) assign(name, x[, name], e)
return(e)
}
x2 <- envn_dict(iris[1, ])
ls(x2)
# [1] "Petal.Length" "Petal.Width" "Sepal.Length" "Sepal.Width" "Species"
x2[["Sepal.Length"]]
# [1] 5.1
### COLLECTIONS PACKAGE ###
coll_dict <- function(x) {
d <- collections::Dict$new()
for (name in names(x)) d$set(name, x[, name])
return(d)
}
x3 <- coll_dict(iris[1, ])
x3$keys()
# [1] "Petal.Length" "Petal.Width" "Sepal.Length" "Sepal.Width" "Species"
x3$get("Sepal.Length")
# [1] 5.1
### HASH PACKAGE ###
hash_dict <- function(x) {
d <- hash::hash()
for (name in names(x)) d[[name]] <- x[, name]
return(d)
}
x4 <- hash_dict(iris[1, ])
hash::keys(x4)
# [1] "Petal.Length" "Petal.Width" "Sepal.Length" "Sepal.Width" "Species"
hash::values(x4, "Sepal.Length")
# Sepal.Length
# 5.1
### DATASTRUCTURES PACKAGE ###
data_dict <- function(x) {
d <- datastructures::hashmap()
for (name in names(x)) d[name] <- x[, name]
return(d)
}
x5 <- data_dict(iris[1, ])
datastructures::keys(x5)
# [1] "Species" "Sepal.Width" "Petal.Length" "Sepal.Length" "Petal.Width"
datastructures::get(x5, "Sepal.Length")
# [1] 5.1
### FROM PYTHON ###
py2r_dict <- function(x) {
return(reticulate::py_dict(names(x), x, TRUE))
}
x6 <- py2r_dict(iris[1, ])
x6$keys()
# [1] "Petal.Length" "Sepal.Length" "Petal.Width" "Sepal.Width" "Species"
x6["Sepal.Length"]
# [1] 5.1
### CONVERT DATAFRAME TO DICTIONARY LIST ###
to_list <- function(df, fn) {
l <- list()
for (i in seq(nrow(df))) l[[i]] <- fn(df[i, ])
return(l)
}
rbenchmark::benchmark(replications = 100, order = "elapsed", relative = "elapsed",
columns = c("test", "replications", "elapsed", "relative", "user.self", "sys.self"),
"BASE::LIST" = to_list(iris, as.list),
"BASE::ENVIRONMENT" = to_list(iris, envn_dict),
"COLLECTIONS::DICT" = to_list(iris, coll_dict),
"HASH::HASH" = to_list(iris, hash_dict),
"DATASTRUCTURES::HASHMAP" = to_list(iris, data_dict),
"RETICULATE::PY_DICT" = to_list(iris, py2r_dict)
)
# test replications elapsed relative user.self sys.self
#1 BASE::LIST 100 0.857 1.000 0.857 0.000
#2 BASE::ENVIRONMENT 100 1.607 1.875 1.607 0.000
#4 HASH::HASH 100 2.600 3.034 2.600 0.000
#3 COLLECTIONS::DICT 100 2.956 3.449 2.956 0.000
#5 DATASTRUCTURES::HASHMAP 100 16.070 18.751 16.071 0.000
#6 RETICULATE::PY_DICT 100 18.030 21.039 18.023 0.008
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment