Skip to content

Instantly share code, notes, and snippets.

Last active January 6, 2025 21:10
Show Gist options
  • Save stephenturner/8b15b71548177be45911110acd73998c to your computer and use it in GitHub Desktop.
Save stephenturner/8b15b71548177be45911110acd73998c to your computer and use it in GitHub Desktop.
Code for summarizing #Rstats posts on bluesky with atrrr+ellmer
# Get top Bluesky posts for #Rstats ---------------------------------------
# authenticate to bluesky using vars in ~/.Rprofile
# or do this interactively with just `auth()`
auth(user = Sys.getenv("BSKY_USER"),
password = Sys.getenv("BSKY_PASS"),
# Get a bunch of recent #Rstats posts
d0 <- atrrr::search_skeet("#rstats", limit = 1000)
# Limit the posts to just the past week
start_date <- Sys.Date()-7L
end_date <- Sys.Date()-1L
top <- 100L
likes_threshold <- 5L
d <-
d0 |>
mutate(date=lubridate::date(indexed_at)) |>
filter(date >= start_date & date <= end_date) |>
transmute(url=uri, author=ifelse(author_name=="", author_handle, author_name), text, likes=like_count) |>
dplyr::mutate(url = url |> gsub("at://", "", x=_) |> gsub("", "post", x=_)) |>
arrange(desc(likes)) |>
head(top) |>
filter(likes>likes_threshold) |>
mutate(text=gsub("\n+", " ", x=text))
saveRDS(d, "rstats-bluesky-posts-dec-18-24-2024.rds")
# Get post text for all posts, and get bullet points for a later summary gist
post_text <- glue("{d$author}\n{d$url}\n{d$text}\n\n") |> paste(collapse="\n")
post_bullets <- glue("- [{d$author}]({d$url}): {d$text}\n") |> head(10) |> paste(collapse="\n")
prompt <- "What are the top posts on #rstats Bluesky last week?
I will provide you with data showing the author, post URL, and post text from the top 100 most liked posts tagged with #Rstats.
Remember, this is bluesky, and posts are called posts, NOT tweets. Make sure you call them posts, not tweets.
I want you to return a summary of the top posts, in markdown.
Who posted about what? And why is it important? Give examples.
Give me a list of topics, a list of common hashtags, and author names.
Total summary should be about 500-1000 words.
When you mention specific posts, link to them using the URL in markdown format.
Also talk about the general overall tone or sentiment.
You can link to a specific post by using the URL provided, where the link text is the author's name, like '[author](url) posted <text>'.
Do not include any preample or postscript text or followup question suggestions.
Here are the posts."
prompt <- gsub("\\n", " ", x=prompt)
# Set up prompt
# Use claude 3.5 sonnet if you have a claude API key in ANTHROPIC_API_KEY
chat <- chat_claude(system_prompt = prompt, model=model)
# Otherwise use an open-source model like llama3.3, gemma2, mistral, etc.
chat <- chat_ollama(system_prompt = prompt, model=model)
chat <- chat_ollama(system_prompt = prompt, model=model)
chat <- chat_ollama(system_prompt = prompt, model=model)
# Try three responses, then choose the best
res <- list()
res[[1]] <- chat$chat(post_text)
res[[2]] <- chat$chat(post_text)
res[[3]] <- chat$chat(post_text)
# Pick the best: change this
best <- 1
# Print it out
# Upload result to GitHub Gist --------------------------------------------
gist_md <- paste("## Summary",
glue("_Produced with {model}_"),
"## Top posts",
glue("_Top 10 posts_:"),
gist_create(filename=paste0(start_date, "-", end_date, "-bsky-summary-", model, ".md"),
description=paste0("Summary of #Rstats posts on bluesky from ", start_date, " to ", end_date, " using ", model),
code = gist_md,
# Sentiment analysis on posts ---------------------------------------------
llm_use("ollama", "llama3.2", seed = 100, temperature = 0)
d <-
d |>
llm_sentiment(text) |>
llm_summarize(text, max_words = 5) |>
llm_translate(text, "spanish")
d |>
count(.sentiment) |>
ggplot(aes(.sentiment, n)) + geom_col() +
theme_classic() +
labs(x="Sentiment", y="Count", title="Sentiment of #Rstats posts on Bluesky") +
smry_md <-
glue("- [{d$author}]({d$url}) ({d$.summary}): {d$.translation}") |>
head(10) |>
gist_create(filename=paste0(start_date, "-", end_date, "-bsky-post-summary-translation-", model, ".md"),
description=paste0("Individual #Rstats post translations: ", start_date, " to ", end_date, " using ", model),
code = smry_md,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment