Last active February 14, 2024 13:45
Simple use of openai package for sentiment analysis of written teaching evaluation comments
Before using this script you need to create an OpenAI API key (
and put it in ~/.Renviron:
# libraries used
# prep_request is specific to the particular dataset I was working on. It takes a data frame,
# selects the columns containing text to be analyzed, makes sure there is a
# period (and not two periods) between separate entries, removes NA and N/A,
# and returns a string that can be used as a prompt for the OpenAI chat API.
# My request to ChatGPT here is "Summarize the main sentiments in the
# following comments, organized into themes."
prep_request <- function(dat,
prepend = "Summarize the main sentiments in the following comments, organized into themes"){
request <- dat |>
dplyr::select(starts_with(c("please", "what"))) |>
unlist() %>% #native pipe doesn't support the following line:
c(prepend, .) |>
str_subset("NA", negate = TRUE) |>
str_subset("N/A", negate = TRUE) |>
paste(collapse = ". ") |>
str_replace_all(coll(".."), ".")
# call_chatgpt takes the request and sends it to the OpenAI chat API in a *new*
# chat. It is possible with the openai client to submit follow-up prompts, but
# I'm not doing that because I wanted the same conditions for each request.
# The stop argument is a string that the API uses to know when to stop -
# I'm not sure why it's necessary, but I got errors when leaving it as the
# default NULL.
call_chatgpt <- function(request, model = "gpt-3.5-turbo-0125"){
openai::create_chat_completion(model = model,
messages = list(list("role" = "user", "content" = request)),
stop = "????")
# chat_completion wraps both of these functions, taking a data frame and
# returning the chat completion
chat_completion <- function(dat, model = "gpt-3.5-turbo-0125"){
request = prep_request(dat)
res <- call_chatgpt(request = request, model = model)
output <- gsub(":", ":\n ", res$choices$message.content)
# Usage - in Rmarkdown / quarto I set `cache = TRUE` to minimize API calls,
# which are not free.
output <- dat |>
dplyr::select(starts_with("please_share_your_thoughts_on_class_size")) |>
chat_completion(model = "gpt-3.5-turbo-0125") #default model
# In R Markdown I had to use this trick of "inline output" to get the output to
# be formatted instead of just a string all on one line.
`r output`
