ozjimbob/ProcessList2

## ProcessList2
library(tidyverse)
library(purrr)
library(readxl)
library(jsonlite)

# Read your spreadsheet
input <- read_excel("Book1-2.xlsx")

# Take your column C and put it into a list, by interpreting the JSON
data <- map(input$context,fromJSON)

# As far as I can tell, this 'data' now matches what you have


# This is a utility function that turns that weird 23x23 data frame
# into a single row with containing all the data
# with no NA values
proc <- function(x){
  frst <- function(x){first(na.omit(x))}
  y <- x %>% summarise_all(lst(frst))
  names(y)<- names(x)
  y
}

# This is the main function
# This takes one list entry and looks at all the fields
# If it's a "normal" field, it adds it as a column to the output
# If it is that nested data frame, then it takes it apart using the
# "proc" function above and then adds all the columns to the output

proc_row <- function(x){
  out = list() #
  for(i in 1:length(x)){
    this_name = names(x[i])

    if(this_name != "custom"){
      full_contents = x[[i]]
      for(j in 1:length(full_contents)){
        sub_name = names(full_contents[j])
        sub_data = full_contents[[j]]
        complete_name = paste0(this_name,"_",sub_name)
        if(length(sub_data)==0){
          sub_data=NA
        }
        out[complete_name]=sub_data
      }
    }else{
      this_df <- x[[i]]$dimensions # Get the contents
      this_df <- as.list(proc(this_df))  # Take it apart so it's just ONE row and not 23
      for(j in 1:length(this_df)){  # For each column
        this_name2 <-   paste0("custom_",names(this_df[j]))
        this_data2 <-  this_df[[j]]
        if(length(this_data2)==0){
          this_data2=NA
        }
        out[this_name2]<- this_data2 # Add it to our output
      }
    }
  }
  as_tibble(out)  # Turn our output list into a little data frame
}


# And here we run it all - apply that "proc_row" function over the input list
final <- data %>% map_df(proc_row)

print(final)

# NOTE;
 # The input data has fields and subfields - the column names are written as field_subfield to enable this to work
 # If data is missing anything in a field, then it stores NA in the output
 # All this only works if every row of the input has the same total number of fields
 # If a given entry has a different number of fields, it will fail - but that is to be expected, because
 # you can't store things in a table if they have differnet numbers of columns
	library(tidyverse)
	library(purrr)
	library(readxl)
	library(jsonlite)

	# Read your spreadsheet
	input <- read_excel("Book1-2.xlsx")

	# Take your column C and put it into a list, by interpreting the JSON
	data <- map(input$context,fromJSON)

	# As far as I can tell, this 'data' now matches what you have


	# This is a utility function that turns that weird 23x23 data frame
	# into a single row with containing all the data
	# with no NA values
	proc <- function(x){
	frst <- function(x){first(na.omit(x))}
	y <- x %>% summarise_all(lst(frst))
	names(y)<- names(x)
	y
	}

	# This is the main function
	# This takes one list entry and looks at all the fields
	# If it's a "normal" field, it adds it as a column to the output
	# If it is that nested data frame, then it takes it apart using the
	# "proc" function above and then adds all the columns to the output

	proc_row <- function(x){
	out = list() #
	for(i in 1:length(x)){
	this_name = names(x[i])

	if(this_name != "custom"){
	full_contents = x[[i]]
	for(j in 1:length(full_contents)){
	sub_name = names(full_contents[j])
	sub_data = full_contents[[j]]
	complete_name = paste0(this_name,"_",sub_name)
	if(length(sub_data)==0){
	sub_data=NA
	}
	out[complete_name]=sub_data
	}
	}else{
	this_df <- x[[i]]$dimensions # Get the contents
	this_df <- as.list(proc(this_df)) # Take it apart so it's just ONE row and not 23
	for(j in 1:length(this_df)){ # For each column
	this_name2 <- paste0("custom_",names(this_df[j]))
	this_data2 <- this_df[[j]]
	if(length(this_data2)==0){
	this_data2=NA
	}
	out[this_name2]<- this_data2 # Add it to our output
	}
	}
	}
	as_tibble(out) # Turn our output list into a little data frame
	}



	# And here we run it all - apply that "proc_row" function over the input list
	final <- data %>% map_df(proc_row)

	print(final)

	# NOTE;
	# The input data has fields and subfields - the column names are written as field_subfield to enable this to work
	# If data is missing anything in a field, then it stores NA in the output
	# All this only works if every row of the input has the same total number of fields
	# If a given entry has a different number of fields, it will fail - but that is to be expected, because
	# you can't store things in a table if they have differnet numbers of columns