Skip to content

Instantly share code, notes, and snippets.

@ozjimbob
Created June 20, 2020 09:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ozjimbob/6c1fb3cb7dfca70b9a0723614d1f3b71 to your computer and use it in GitHub Desktop.
Save ozjimbob/6c1fb3cb7dfca70b9a0723614d1f3b71 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(purrr)
# Make a list that looks a bit like yours
# It has two entries
# Each entry has two "normal fields (application, user) - which have different lengths
# And a third field "custom" which contains the weird, staggered dataframe
data <- list()
df <- data.frame(a=c(1,NA,NA),
b=c(NA,"hello",NA),
c=c(NA,NA,14.6))
data[[1]] <- list(application = list("AppName"),
user = list(c("somebody","sometime")),
custom = list(dimensions = df))
data[[2]] <- list(application = list(c("something","something else","and again")),
user = list("else"),
custom = list(dimensions = df))
# Look at the structure of data to see what it looks like
str(data)
# This is a utility function that turns that weird 23x23 data frame
# into a single row with containing all the data
# with no NA values
proc <- function(x){
frst <- function(x){first(na.omit(x))}
y <- x %>% summarise_all(lst(frst))
names(y)<- names(x)
y
}
# This is the main function
# This takes one list entry and looks at all the fields
# If it's a "normal" field, it adds it as a column to the output
# If it is that nested data frame, then it takes it apart using the
# "proc" function above and then adds all the columns to the output
proc_row <- function(x){
out = list() # List to contain our output
for(i in 1:length(x)){ # For each field in this entry
y=x[i] # Get that field
this_name <- names(y) # Get the name of that field
this_data <- y[[1]] # Get the contents of that field
if(this_name != "custom"){ # Is this the weird field with the data frame?
out[this_name]<-this_data # If not - just add the data as a column
}else{ # But if it IS the data frame
this_df <- this_data$dimensions # Get the contents
this_df <- as.list(proc(this_df)) # Take it apart so it's just ONE row and not 23
for(j in 1:length(this_df)){ # For each column
this_name2 <- names(this_df[j])
this_data2 <- this_df[[j]]
out[this_name2]<- this_data2 # Add it to our output
}
}
}
as_tibble(out) # Turn our output list into a little data frame
}
# And here we run it all - apply that "proc_row" function over the input list
final <- data %>% map_df(proc_row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment