Skip to content

Instantly share code, notes, and snippets.

@dubsnipe
Created March 15, 2023 17:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dubsnipe/3e4ef2acc4baf654fc58ea01098ff2c7 to your computer and use it in GitHub Desktop.
Save dubsnipe/3e4ef2acc4baf654fc58ea01098ff2c7 to your computer and use it in GitHub Desktop.
require(httr)
require(jsonlite)
require(dplyr)
query_titles <- function(apfrom = ""){
api_url <- paste0("https://www.appropedia.org/w/api.php?action=query&list=allpages&aplimit=500&format=json&apfrom=", apfrom)
res <- GET(api_url)
data = fromJSON(rawToChar(res$content))
data_tibble = tibble(data$query$allpages) %>% filter(ns == 0) %>% select(title)
return(data_tibble)
}
apfrom <- ""
full_list <- query_titles()
partial_list = query_titles(tail(full_list,1))
while (TRUE) {
partial_list <- query_titles(apfrom)
full_list <- bind_rows(full_list, partial_list)
if (nrow(partial_list) <= 0){
break
}
apfrom <- as.character(tail(partial_list, 1))
print(apfrom)
}
full_list <- unique(full_list)
write.csv(full_list, file="appropedia_page_list.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment