Skip to content

Instantly share code, notes, and snippets.

@ozjimbob
Created June 20, 2020 23:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ozjimbob/863a0f553b89e695f524e735cc4b340f to your computer and use it in GitHub Desktop.
Save ozjimbob/863a0f553b89e695f524e735cc4b340f to your computer and use it in GitHub Desktop.
library(tidyverse)
library(purrr)
library(readxl)
library(jsonlite)
# Read your spreadsheet
input <- read_excel("Book1-2.xlsx")
# Take your column C and put it into a list, by interpreting the JSON
data <- map(input$context,fromJSON)
# As far as I can tell, this 'data' now matches what you have
# This is a utility function that turns that weird 23x23 data frame
# into a single row with containing all the data
# with no NA values
proc <- function(x){
frst <- function(x){first(na.omit(x))}
y <- x %>% summarise_all(lst(frst))
names(y)<- names(x)
y
}
# This is the main function
# This takes one list entry and looks at all the fields
# If it's a "normal" field, it adds it as a column to the output
# If it is that nested data frame, then it takes it apart using the
# "proc" function above and then adds all the columns to the output
proc_row <- function(x){
out = list() #
for(i in 1:length(x)){
this_name = names(x[i])
if(this_name != "custom"){
full_contents = x[[i]]
for(j in 1:length(full_contents)){
sub_name = names(full_contents[j])
sub_data = full_contents[[j]]
complete_name = paste0(this_name,"_",sub_name)
if(length(sub_data)==0){
sub_data=NA
}
out[complete_name]=sub_data
}
}else{
this_df <- x[[i]]$dimensions # Get the contents
this_df <- as.list(proc(this_df)) # Take it apart so it's just ONE row and not 23
for(j in 1:length(this_df)){ # For each column
this_name2 <- paste0("custom_",names(this_df[j]))
this_data2 <- this_df[[j]]
if(length(this_data2)==0){
this_data2=NA
}
out[this_name2]<- this_data2 # Add it to our output
}
}
}
as_tibble(out) # Turn our output list into a little data frame
}
# And here we run it all - apply that "proc_row" function over the input list
final <- data %>% map_df(proc_row)
print(final)
# NOTE;
# The input data has fields and subfields - the column names are written as field_subfield to enable this to work
# If data is missing anything in a field, then it stores NA in the output
# All this only works if every row of the input has the same total number of fields
# If a given entry has a different number of fields, it will fail - but that is to be expected, because
# you can't store things in a table if they have differnet numbers of columns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment