lwaldron/openai_analyzecomments.R

## openai_analyzecomments.R
Before using this script you need to create an OpenAI API key (https://platform.openai.com/api-keys)
and put it in ~/.Renviron:

OPENAI_API_KEY='my_key_here'

# libraries used
library(openai)
library(dplyr)
library(stringr)

# prep_request is specific to the particular dataset I was working on. It takes a data frame,
# selects the columns containing text to be analyzed, makes sure there is a
# period (and not two periods) between separate entries, removes NA and N/A,
# and returns a string that can be used as a prompt for the OpenAI chat API.
# My request to ChatGPT here is "Summarize the main sentiments in the
# following comments, organized into themes."
prep_request <- function(dat,
    prepend = "Summarize the main sentiments in the following comments, organized into themes"){
  request <- dat |>
    dplyr::select(starts_with(c("please", "what"))) |>
    unlist() %>% #native pipe doesn't support the following line:
    c(prepend, .) |>
    str_subset("NA", negate = TRUE) |>
    str_subset("N/A", negate = TRUE) |>
    paste(collapse = ". ") |>
    str_replace_all(coll(".."), ".")
  return(request)
}

# call_chatgpt takes the request and sends it to the OpenAI chat API in a *new*
# chat. It is possible with the openai client to submit follow-up prompts, but
# I'm not doing that because I wanted the same conditions for each request.
# The stop argument is a string that the API uses to know when to stop -
# I'm not sure why it's necessary, but I got errors when leaving it as the
# default NULL.
call_chatgpt <- function(request, model = "gpt-3.5-turbo-0125"){
  openai::create_chat_completion(model = model,
                                 messages = list(list("role" = "user", "content" = request)),
                                 stop = "????")
}

# chat_completion wraps both of these functions, taking a data frame and
# returning the chat completion
chat_completion <- function(dat, model = "gpt-3.5-turbo-0125"){
  request = prep_request(dat)
  res <- call_chatgpt(request = request, model = model)
  output <- gsub(":", ":\n ", res$choices$message.content)
  output
}

# Usage - in Rmarkdown / quarto I set `cache = TRUE` to minimize API calls,
# which are not free.

output <- dat |>
  dplyr::select(starts_with("please_share_your_thoughts_on_class_size")) |>
  chat_completion(model = "gpt-3.5-turbo-0125") #default model

# In R Markdown I had to use this trick of "inline output" to get the output to
# be formatted instead of just a string all on one line.
`r output`
	Before using this script you need to create an OpenAI API key (https://platform.openai.com/api-keys)
	and put it in ~/.Renviron:

	OPENAI_API_KEY='my_key_here'

	# libraries used
	library(openai)
	library(dplyr)
	library(stringr)

	# prep_request is specific to the particular dataset I was working on. It takes a data frame,
	# selects the columns containing text to be analyzed, makes sure there is a
	# period (and not two periods) between separate entries, removes NA and N/A,
	# and returns a string that can be used as a prompt for the OpenAI chat API.
	# My request to ChatGPT here is "Summarize the main sentiments in the
	# following comments, organized into themes."
	prep_request <- function(dat,
	prepend = "Summarize the main sentiments in the following comments, organized into themes"){
	request <- dat \|>
	dplyr::select(starts_with(c("please", "what"))) \|>
	unlist() %>% #native pipe doesn't support the following line:
	c(prepend, .) \|>
	str_subset("NA", negate = TRUE) \|>
	str_subset("N/A", negate = TRUE) \|>
	paste(collapse = ". ") \|>
	str_replace_all(coll(".."), ".")
	return(request)
	}

	# call_chatgpt takes the request and sends it to the OpenAI chat API in a new
	# chat. It is possible with the openai client to submit follow-up prompts, but
	# I'm not doing that because I wanted the same conditions for each request.
	# The stop argument is a string that the API uses to know when to stop -
	# I'm not sure why it's necessary, but I got errors when leaving it as the
	# default NULL.
	call_chatgpt <- function(request, model = "gpt-3.5-turbo-0125"){
	openai::create_chat_completion(model = model,
	messages = list(list("role" = "user", "content" = request)),
	stop = "????")
	}

	# chat_completion wraps both of these functions, taking a data frame and
	# returning the chat completion
	chat_completion <- function(dat, model = "gpt-3.5-turbo-0125"){
	request = prep_request(dat)
	res <- call_chatgpt(request = request, model = model)
	output <- gsub(":", ":\n ", res$choices$message.content)
	output
	}

	# Usage - in Rmarkdown / quarto I set `cache = TRUE` to minimize API calls,
	# which are not free.

	output <- dat \|>
	dplyr::select(starts_with("please_share_your_thoughts_on_class_size")) \|>
	chat_completion(model = "gpt-3.5-turbo-0125") #default model

	# In R Markdown I had to use this trick of "inline output" to get the output to
	# be formatted instead of just a string all on one line.
	`r output`