Tidyverse all the way and read in the data from the gist
library(tidyverse)
gist_csv <- "https://gist.github.com/dgkeyes/e37a1aefba31578c29e1418b6f897a68/raw/9e15eec3e4d5f0c138f088510e5808ec68ef8320/all_content.csv"
x <- read_csv(gist_csv)
Just getting a feel for the text content...
x %>%
# content: one line per row
separate_rows(content, sep = "\n") %>%
# lines with searchie-blocks
filter(str_detect(content, "searchie-block")) %>%
slice(n = 1) %>%
pull(content)
#> [1] "<!-- wp:searchie-block/searchie-media-block {\"content\":\"https://app.searchie.io/file/mpDokVMaD4/embed\",\"searchie_medias\":[{\"value\":\"-1\",\"label\":\"Select\"},{\"value\":\"https://app.searchie.io/file/mpDokVMaD4/embed\",\"label\":\"YAML\"},{\"value\":\"https://app.searchie.io/file/mpDokVZJD4/embed\",\"label\":\"Advanced YAML\"},{\"value\":\"https://app.searchie.io/file/04Nykx872a/embed\",\"label\":\"YAML Solution\"},{\"value\":\"https://app.searchie.io/file/lVNOO7KpNG/embed\",\"label\":\"Advanced YAML Solutions\"}],\"is_responsive\":true,\"iframe_class_name\":\"sio-frontend-iframe-container\",\"searchFiles\":\"yaml\"} -->"
hmmm interesting, here's what a full file might look like
x %>%
filter(str_detect(content, "searchie-block")) %>%
mutate(lines = str_count(content, "\n") + 1) %>%
filter(lines >= 5) %>%
slice_min(lines, n = 1) %>%
pull(content) %>%
cat()
#> <!-- wp:searchie-block/searchie-media-block {"content":"https://app.searchie.io/file/AYDabdknNQ/embed","searchie_medias":[{"value":"-1","label":"Select"},{"value":"https://app.searchie.io/file/AYDabdknNQ/embed","label":"Dealing with small cells"},{"value":"https://app.searchie.io/file/YxDWAra0qg/embed","label":"Dealing with Missing Data"},{"value":"https://app.searchie.io/file/rONAzaoRqZ/embed","label":"Dealing with Missing Data Solutions"},{"value":"https://app.searchie.io/file/P524Yb5OD1/embed","label":"Welcome to Getting Started with R"},{"value":"https://app.searchie.io/file/P5qnaJ6JNr/embed","label":"Welcome to Inferential Stats with R"},{"value":"https://app.searchie.io/file/jVDGQ3PWqW/embed","label":"Getting Started with Data Wrangling and Analysis"}],"is_responsive":true,"iframe_class_name":"sio-frontend-iframe-container","searchFiles":"dealing with small cells"} -->
#> <div class="sio-frontend-iframe-container"><iframe src="https://app.searchie.io/file/AYDabdknNQ/embed" loading="lazy" width="560" height="315" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></div>
#> <!-- /wp:searchie-block/searchie-media-block -->
#>
#> <!-- wp:coblocks/accordion -->
#> <div class="wp-block-coblocks-accordion"><!-- wp:coblocks/accordion-item -->
#> <div class="wp-block-coblocks-accordion-item"></div>
#> <!-- /wp:coblocks/accordion-item --></div>
#> <!-- /wp:coblocks/accordion --> <!-- wp:paragraph -->
#> <p>For this week's assignment, we want you to create your own function. It can be anything that could help you to do your work more efficiently. Maybe something to import data? A plotting function? A summarizing function? It's up to you! Here's an example of what we're asking you to do.</p>
#> <!-- /wp:paragraph -->
#>
#> <!-- wp:searchie-block/searchie-media-block {"content":"https://app.searchie.io/file/W6NMJE7lqZ/embed","searchie_medias":[{"value":"-1","label":"Select"},{"value":"https://app.searchie.io/file/ekqkVpKnN4/embed","label":"Functions"},{"value":"https://app.searchie.io/file/V6NKLr4mqz/embed","label":"Functions Solutions"},{"value":"https://app.searchie.io/file/ZrDevxkBNG/embed","label":"Objects and Functions in R"},{"value":"https://app.searchie.io/file/W6NMJE7lqZ/embed","label":"Functions Assignment"}],"is_responsive":true,"iframe_class_name":"sio-frontend-iframe-container","searchFiles":"functions"} -->
#> <div class="sio-frontend-iframe-container"><iframe src="https://app.searchie.io/file/W6NMJE7lqZ/embed" loading="lazy" width="560" height="315" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></div>
#> <!-- /wp:searchie-block/searchie-media-block -->
#>
#> <!-- wp:ninja-forms/form {"formID":25,"formTitle":"\u0026lt;br\u0026gt;R in 3 Months Project Assignment Submission"} /-->
Okay, lets go to town on this searchie block business.
x %>%
filter(str_detect(content, "searchie-block")) %>%
mutate(
# find the searchie-blocks (which span multiple lines)
blocks = str_extract_all(
content,
"<!-- wp:searchie-block/searchie-media-block ((.|\n)+?)<!-- /wp:searchie-block/searchie-media-block -->"
),
# then split each block to take out the searchie block delimiters
# which leaves the json as item 2 and the html stuff as item 3
block_content = map(
blocks,
~ map(.x, function(block) {
block <- str_split(block, "<!-- /?wp:searchie-block/searchie-media-block | -->")
block <- block[[1]][c(2, 3)]
names(block) <- c("json", "html")
block
})
)
) %>%
# wrangle into a tibble
select(-content, -blocks) %>%
unnest(block_content) %>%
unnest_wider(block_content)
#> # A tibble: 281 × 5
#> link title post_type json html
#> <chr> <chr> <chr> <chr> <chr>
#> 1 https://staging-rfortherestofus.kinsta.cloud/sea… Sear… page "{\"… "\n<…
#> 2 https://staging-rfortherestofus.kinsta.cloud/202… You … post "{\"… "\n<…
#> 3 https://staging-rfortherestofus.kinsta.cloud/202… How … post "{\"… "\n<…
#> 4 https://staging-rfortherestofus.kinsta.cloud/202… How … post "{\"… "\n<…
#> 5 https://staging-rfortherestofus.kinsta.cloud/202… How … post "{\"… "\n<…
#> 6 https://staging-rfortherestofus.kinsta.cloud/202… How … post "{\"… "\n<…
#> 7 https://staging-rfortherestofus.kinsta.cloud/202… A Ch… post "{\"… "\n<…
#> 8 https://staging-rfortherestofus.kinsta.cloud/202… A Ch… post "{\"… "\n<…
#> 9 https://staging-rfortherestofus.kinsta.cloud/202… 2021… post "{\"… "\n<…
#> 10 https://staging-rfortherestofus.kinsta.cloud/cou… Welc… sfwd-les… "{\"… "\n<…
#> # … with 271 more rows
for even more fun, you can add a