Last active
January 6, 2025 21:10
-
-
Save stephenturner/8b15b71548177be45911110acd73998c to your computer and use it in GitHub Desktop.
Code for summarizing #Rstats posts on bluesky with atrrr+ellmer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get top Bluesky posts for #Rstats --------------------------------------- | |
library(dplyr) | |
library(glue) | |
library(atrrr) | |
library(ellmer) | |
# authenticate to bluesky using vars in ~/.Rprofile | |
# or do this interactively with just `auth()` | |
auth(user = Sys.getenv("BSKY_USER"), | |
password = Sys.getenv("BSKY_PASS"), | |
overwrite=TRUE) | |
# Get a bunch of recent #Rstats posts | |
d0 <- atrrr::search_skeet("#rstats", limit = 1000) | |
# Limit the posts to just the past week | |
start_date <- Sys.Date()-7L | |
end_date <- Sys.Date()-1L | |
top <- 100L | |
likes_threshold <- 5L | |
d <- | |
d0 |> | |
mutate(date=lubridate::date(indexed_at)) |> | |
filter(date >= start_date & date <= end_date) |> | |
transmute(url=uri, author=ifelse(author_name=="", author_handle, author_name), text, likes=like_count) |> | |
dplyr::mutate(url = url |> gsub("at://", "https://bsky.app/profile/", x=_) |> gsub("app.bsky.feed.post", "post", x=_)) |> | |
arrange(desc(likes)) |> | |
head(top) |> | |
filter(likes>likes_threshold) |> | |
mutate(text=gsub("\n+", " ", x=text)) | |
saveRDS(d, "rstats-bluesky-posts-dec-18-24-2024.rds") | |
# Get post text for all posts, and get bullet points for a later summary gist | |
post_text <- glue("{d$author}\n{d$url}\n{d$text}\n\n") |> paste(collapse="\n") | |
post_bullets <- glue("- [{d$author}]({d$url}): {d$text}\n") |> head(10) |> paste(collapse="\n") | |
prompt <- "What are the top posts on #rstats Bluesky last week? | |
I will provide you with data showing the author, post URL, and post text from the top 100 most liked posts tagged with #Rstats. | |
Remember, this is bluesky, and posts are called posts, NOT tweets. Make sure you call them posts, not tweets. | |
I want you to return a summary of the top posts, in markdown. | |
Who posted about what? And why is it important? Give examples. | |
Give me a list of topics, a list of common hashtags, and author names. | |
Total summary should be about 500-1000 words. | |
When you mention specific posts, link to them using the URL in markdown format. | |
Also talk about the general overall tone or sentiment. | |
You can link to a specific post by using the URL provided, where the link text is the author's name, like '[author](url) posted <text>'. | |
Do not include any preample or postscript text or followup question suggestions. | |
Here are the posts." | |
prompt <- gsub("\\n", " ", x=prompt) | |
# Set up prompt | |
# Use claude 3.5 sonnet if you have a claude API key in ANTHROPIC_API_KEY | |
model="claude-3-5-sonnet-20241022" | |
chat <- chat_claude(system_prompt = prompt, model=model) | |
# Otherwise use an open-source model like llama3.3, gemma2, mistral, etc. | |
model="gemma2" | |
chat <- chat_ollama(system_prompt = prompt, model=model) | |
model="mistral" | |
chat <- chat_ollama(system_prompt = prompt, model=model) | |
model="llama3.3" | |
chat <- chat_ollama(system_prompt = prompt, model=model) | |
# Try three responses, then choose the best | |
res <- list() | |
res[[1]] <- chat$chat(post_text) | |
res[[2]] <- chat$chat(post_text) | |
res[[3]] <- chat$chat(post_text) | |
# Pick the best: change this | |
best <- 1 | |
# Print it out | |
message(res[[best]]) | |
# Upload result to GitHub Gist -------------------------------------------- | |
library(gistr) | |
gist_auth() | |
gist_md <- paste("## Summary", | |
glue("_Produced with {model}_"), | |
res[[best]], | |
"## Top posts", | |
glue("_Top 10 posts_:"), | |
post_bullets, | |
sep="\n\n") | |
gist_create(filename=paste0(start_date, "-", end_date, "-bsky-summary-", model, ".md"), | |
description=paste0("Summary of #Rstats posts on bluesky from ", start_date, " to ", end_date, " using ", model), | |
code = gist_md, | |
public=TRUE) | |
# Sentiment analysis on posts --------------------------------------------- | |
library(mall) | |
llm_use("ollama", "llama3.2", seed = 100, temperature = 0) | |
d <- | |
d |> | |
llm_sentiment(text) |> | |
llm_summarize(text, max_words = 5) |> | |
llm_translate(text, "spanish") | |
library(ggplot2) | |
d |> | |
count(.sentiment) |> | |
ggplot(aes(.sentiment, n)) + geom_col() + | |
theme_classic() + | |
labs(x="Sentiment", y="Count", title="Sentiment of #Rstats posts on Bluesky") + | |
coord_flip() | |
smry_md <- | |
glue("- [{d$author}]({d$url}) ({d$.summary}): {d$.translation}") |> | |
head(10) |> | |
paste(collapse="\n") | |
gist_create(filename=paste0(start_date, "-", end_date, "-bsky-post-summary-translation-", model, ".md"), | |
description=paste0("Individual #Rstats post translations: ", start_date, " to ", end_date, " using ", model), | |
code = smry_md, | |
public=TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment