Skip to content

Instantly share code, notes, and snippets.

@reuning
reuning / NYT_pundits.r
Created July 22, 2024 17:21
NYT Pundits Ratings of Democratic Candidates
library(tidyverse)
library(ggpubr)
library(ggrepel)
data <- read_csv("nyt_pundits.csv")
sum_data <- data |> group_by(Pundit) |>
summarize(Avg_Elect=mean(Electable),
Avg_Excit=mean(Exciting))
ggplot(data, aes(x=Electable, y=Exciting, color=Candidate)) +
@reuning
reuning / cook_pvi.R
Last active April 23, 2024 16:33
Calculates state level Cook PVI using their current weighting.
library(tidyverse)
## Data from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910%2FDVN%2F42MVDX
df <- read_csv("1976-2020-president.csv")
cook_pvi <- df |> filter(party_simplified%in% c("DEMOCRAT", "REPUBLICAN")) |>
select(year, state, party_simplified, candidatevotes) |>
pivot_wider(id_cols=c(year,state),
names_from = party_simplified,
values_from=candidatevotes, values_fn=sum) |>
@reuning
reuning / vac_vote_plot.R
Created July 31, 2021 23:16
Plot of vaccination vs Biden support with outliers higlighted
setwd("~/Downloads/")
library(ggplot)
library(ggrepel)
library(data.table)
df <- fread("https://data.cdc.gov/resource/8xkx-amqh.csv")
df_election <- fread("countypres_2000-2020.csv") # https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/42MVDX
df_election <- df_election[year==2020 & party == "DEMOCRAT" & mode=="TOTAL"]
df_election[,prop:=100*candidatevotes/totalvotes]
df[,fips:=as.numeric(fips)]
all <- merge(df, df_election, by.x="fips",by.y="county_fips")
@reuning
reuning / polling_election_margins.R
Created November 5, 2020 14:59
Compare polling and election margins in R. Data scraped from NYTimes results.
library(rvest)
library(data.table)
library(ggplot2)
library(ggrepel)
page <- read_html("https://www.nytimes.com/interactive/2020/11/03/us/elections/results-president.html")
links <- page %>% html_node(".e-state-list") %>% html_nodes("a") %>% html_attr("href")
get_data <- function(link){
library(rvest)
library(magrittr)
tmp <- read_html("https://results.thecaucuses.org/")
full <- tmp %>% html_nodes("ul.thead") %>% html_nodes("li") %>% html_text()
sub <- tmp %>% html_nodes("ul.sub-head") %>% html_nodes("li") %>% html_text()
full <- full[-1:-2]
sub <- sub[-1:-2]
counties <- tmp %>% html_nodes('div.precinct-rows')
@reuning
reuning / polling_tweets.R
Last active January 22, 2020 16:59
Simple script to grab tweets with polling info and process them to see what leads to retweets.
library(rtweet)
library(data.table)
library(ggplot2)
library(stringr)
tweets <- as.data.table(get_timeline("PpollingNumbers", n=3200))
tweets <- tweets[is_retweet!=TRUE,]
tweets[,nat:=grepl("National", text, ignore.case = T) &
grepl("Biden", text, ignore.case = T) &
!grepl("students", text, ignore.case=T) &
@reuning
reuning / Stock_Tweets.R
Last active February 7, 2018 21:59
Uses the twitteR and SentimentAnalysis package to grab tweets over the last 7 days mentioning 'stock market' and extract sentiment
library(twitteR)
library(tm)
library(SentimentAnalysis)
setup_twitter_oauth(consumer_key = ,
consumer_secret =,
access_token = ,
access_secret = )
poll.conf <- function(url, inc.und=T){
library(rvest)
library(magrittr)
library(stringr)
poll.page <- html(url)
poll.r <- poll.page %>% html_node("table[class='poll-results-table']") %>% html_table()
poll.n <- poll.page %>% html_node("div[class='subpop-description']") %>% html_text()
tmp <- gregexpr("\\d", poll.n)[[1]]