Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Last active January 6, 2025 21:10
Show Gist options
  • Save stephenturner/8b15b71548177be45911110acd73998c to your computer and use it in GitHub Desktop.
Save stephenturner/8b15b71548177be45911110acd73998c to your computer and use it in GitHub Desktop.
Code for summarizing #Rstats posts on bluesky with atrrr+ellmer
# Get top Bluesky posts for #Rstats ---------------------------------------
library(dplyr)
library(glue)
library(atrrr)
library(ellmer)
# authenticate to bluesky using vars in ~/.Rprofile
# or do this interactively with just `auth()`
auth(user = Sys.getenv("BSKY_USER"),
password = Sys.getenv("BSKY_PASS"),
overwrite=TRUE)
# Get a bunch of recent #Rstats posts
d0 <- atrrr::search_skeet("#rstats", limit = 1000)
# Limit the posts to just the past week
start_date <- Sys.Date()-7L
end_date <- Sys.Date()-1L
top <- 100L
likes_threshold <- 5L
d <-
d0 |>
mutate(date=lubridate::date(indexed_at)) |>
filter(date >= start_date & date <= end_date) |>
transmute(url=uri, author=ifelse(author_name=="", author_handle, author_name), text, likes=like_count) |>
dplyr::mutate(url = url |> gsub("at://", "https://bsky.app/profile/", x=_) |> gsub("app.bsky.feed.post", "post", x=_)) |>
arrange(desc(likes)) |>
head(top) |>
filter(likes>likes_threshold) |>
mutate(text=gsub("\n+", " ", x=text))
saveRDS(d, "rstats-bluesky-posts-dec-18-24-2024.rds")
# Get post text for all posts, and get bullet points for a later summary gist
post_text <- glue("{d$author}\n{d$url}\n{d$text}\n\n") |> paste(collapse="\n")
post_bullets <- glue("- [{d$author}]({d$url}): {d$text}\n") |> head(10) |> paste(collapse="\n")
prompt <- "What are the top posts on #rstats Bluesky last week?
I will provide you with data showing the author, post URL, and post text from the top 100 most liked posts tagged with #Rstats.
Remember, this is bluesky, and posts are called posts, NOT tweets. Make sure you call them posts, not tweets.
I want you to return a summary of the top posts, in markdown.
Who posted about what? And why is it important? Give examples.
Give me a list of topics, a list of common hashtags, and author names.
Total summary should be about 500-1000 words.
When you mention specific posts, link to them using the URL in markdown format.
Also talk about the general overall tone or sentiment.
You can link to a specific post by using the URL provided, where the link text is the author's name, like '[author](url) posted <text>'.
Do not include any preample or postscript text or followup question suggestions.
Here are the posts."
prompt <- gsub("\\n", " ", x=prompt)
# Set up prompt
# Use claude 3.5 sonnet if you have a claude API key in ANTHROPIC_API_KEY
model="claude-3-5-sonnet-20241022"
chat <- chat_claude(system_prompt = prompt, model=model)
# Otherwise use an open-source model like llama3.3, gemma2, mistral, etc.
model="gemma2"
chat <- chat_ollama(system_prompt = prompt, model=model)
model="mistral"
chat <- chat_ollama(system_prompt = prompt, model=model)
model="llama3.3"
chat <- chat_ollama(system_prompt = prompt, model=model)
# Try three responses, then choose the best
res <- list()
res[[1]] <- chat$chat(post_text)
res[[2]] <- chat$chat(post_text)
res[[3]] <- chat$chat(post_text)
# Pick the best: change this
best <- 1
# Print it out
message(res[[best]])
# Upload result to GitHub Gist --------------------------------------------
library(gistr)
gist_auth()
gist_md <- paste("## Summary",
glue("_Produced with {model}_"),
res[[best]],
"## Top posts",
glue("_Top 10 posts_:"),
post_bullets,
sep="\n\n")
gist_create(filename=paste0(start_date, "-", end_date, "-bsky-summary-", model, ".md"),
description=paste0("Summary of #Rstats posts on bluesky from ", start_date, " to ", end_date, " using ", model),
code = gist_md,
public=TRUE)
# Sentiment analysis on posts ---------------------------------------------
library(mall)
llm_use("ollama", "llama3.2", seed = 100, temperature = 0)
d <-
d |>
llm_sentiment(text) |>
llm_summarize(text, max_words = 5) |>
llm_translate(text, "spanish")
library(ggplot2)
d |>
count(.sentiment) |>
ggplot(aes(.sentiment, n)) + geom_col() +
theme_classic() +
labs(x="Sentiment", y="Count", title="Sentiment of #Rstats posts on Bluesky") +
coord_flip()
smry_md <-
glue("- [{d$author}]({d$url}) ({d$.summary}): {d$.translation}") |>
head(10) |>
paste(collapse="\n")
gist_create(filename=paste0(start_date, "-", end_date, "-bsky-post-summary-translation-", model, ".md"),
description=paste0("Individual #Rstats post translations: ", start_date, " to ", end_date, " using ", model),
code = smry_md,
public=TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment