Skip to content

Instantly share code, notes, and snippets.

@jfaganUK
Created March 1, 2017 20:09
Show Gist options
  • Save jfaganUK/b6e1d647194126ff26dffbdace273723 to your computer and use it in GitHub Desktop.
Save jfaganUK/b6e1d647194126ff26dffbdace273723 to your computer and use it in GitHub Desktop.
Functions for connecting to and running analyses on uClassify
### Functions for connecting and running uclassify text
#' Create an output dataset
#'
#' @param text_vec The vector of text that will be used
#' @return A data.table
uclassify_output_dt <- function(text_vec) {
N <- length(text_vec)
data.table(txt = text_vec, text_coverage = numeric(N))
}
#' Add the classifier properties to the output data table
#'
#' @param o The output dataset
#' @param url The URL of the uClassifier
#' @param token the API token
#' @return A data table with some new columns informed from the API
uclassify_get_classifier_struct <- function(o, url = getOption('uclassifyURL'), token = getOption('uclassifyToken')) {
require(httr)
req <- GET(url, encode = 'json',
add_headers(Authorization = paste0("Token ", token), 'Content-Type' = 'application/json'))
classifier_structure <- fromJSON(rawToChar(req$content))
for(i in 1:nrow(classifier_structure)) {
o[[classifier_structure$className[i]]] <- numeric(N)
o[[classifier_structure$className[i]]] <- NA
}
return(o)
}
#' Run the vector of text against the uClassify classifier defined using the URL.
#'
#' @param text_vec The vector of text
#' @param url The URL of the classifier
#' @param token The API token
#' @return A data table of results
uclassify_get_classify_results <- function(text_vec, url = getOption('uclassifyURL'), token = getOption('uclassifyToken')) {
o <- uclassify_output_dt(text_vec)
json_body <- toJSON(list(texts = text_vec))
# request results from the classifier
req <- POST(paste0(url, '/classify'),
body = json_body, encode = 'json',
add_headers(Authorization = paste0("Token ", token), 'Content-Type' = 'application/json'))
result_list <- fromJSON(rawToChar(req$content), simplifyVector = F, simplifyDataFrame = T, flatten = T)
# reshape the results
o$text_coverage <- result_list$textCoverage
o <- cbind(o, uclassify_reshape_result(result_list))
return(o)
}
#' Takes the request, result list and reshapes the classification results into a data table
#'
#' @param result_list A result list converted from JSON
#' @return A data.table of the combined, reshaped results
uclassify_reshape_result <- function(result_list) {
xo <- rbindlist(lapply(result_list$classification, function(x) {
x$id <- 0
xo <- reshape(x, direction = 'wide', timevar = 'className', idvar = 'id')
xo$id <- NULL
colnames(xo) <- gsub('p\\.', '', colnames(xo))
xo
}))
return(xo)
}
rm(list=ls())
gc()
library(jsonlite)
library(httr)
library(data.table)
source('uclassify_functions.R')
# this is just an example url for a recent project
options(uclassifyURL = 'https://api.uclassify.com/v1/prfekt/myers-briggs-attitude',
uclassifyToken = 'yourAPIKey')
# just some some text
text_vec <- c('Great meeting with CEOs of leading U.S. health insurance companies who provide great healthcare to the American people.',
'Russia talk is FAKE NEWS put out by the Dems, and played up by the media, in order to mask the big election defeat and the illegal leaks!',
'The race for DNC Chairman was, of course, totally "rigged." Bernie\'s guy, like Bernie himself, never had a chance. Clinton demanded Perez!')
o <- uclassify_get_classify_results(text_vec)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment