Skip to content

Instantly share code, notes, and snippets.

@dhicks
Created March 8, 2017 15:28
Show Gist options
  • Save dhicks/9688882e5cbeee99d032cc1637c352e1 to your computer and use it in GitHub Desktop.
Save dhicks/9688882e5cbeee99d032cc1637c352e1 to your computer and use it in GitHub Desktop.
google_books_count pairs
library(foreach)
library(doSNOW)
library(dplyr)
library(httr)
google_books_count = function (search_terms) {
require(httr)
terms = str_c(search_terms, collapse = '+')
query_url = str_c('https://www.googleapis.com/books/v1/volumes',
'?',
'q=', terms,
'&project=lite&langRestrict=en&maxResults=1',
'&key=', api_key)
response = GET(query_url)
suppressMessages(response_list <- parsed_content(response))
log_f = log(response_list$totalItems)
return(log_f)
}
load('pairs-nodist.Rdata')
## Register parallel cluster
cl = makeCluster(parallel::detectCores())
# registerDoParallel(cl)
registerDoSNOW(cl)
## Define progress bar
pb = txtProgressBar(max = nrow(pairs), style = 3)
progress = function(n) setTxtProgressBar(pb, n)
system.time(
pairs <- foreach(pair = iter(pairs, by = 'row'), .combine = bind_rows,
.packages = c('dplyr', 'stringr', 'xml2'),
.options.snow = list(progress = progress)
) %dopar% {
x = pair$Var1
y = pair$Var2
f_xy = google_books_count(c(x, y))
# Sys.sleep(.5)
tibble(Var1 = x, Var2 = y, f_xy)
}
)
save(pairs, file = 'pairs-dist.Rdata')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment