Skip to content

Instantly share code, notes, and snippets.

@allatambov
Created May 29, 2020 00:20
Show Gist options
  • Save allatambov/7cbf378c1fd15aadc8001c24d6569b31 to your computer and use it in GitHub Desktop.
Save allatambov/7cbf378c1fd15aadc8001c24d6569b31 to your computer and use it in GitHub Desktop.
#install.packages("RSelenium")
library(RSelenium)
rd <- rsDriver(browser="firefox", check = F)
browser <- rd$client
browser$navigate("https://twitter.com/explore")
input <- browser$findElement(using = "xpath", "//input[@enterkeyhint='search']")
input$sendKeysToElement(list("$python", key = 'enter'))
# $python since:2020-05-20 until:2020-05-25
keyword <- "$python"
since <- "2020-05-20"
until <- "2020-05-25"
query <- sprintf("%s since:%s until:%s", keyword, since, until)
query
input$clearElement()
input$sendKeysToElement(list(query, key = 'enter'))
tws <- browser$findElements(using = "xpath", "//div[@data-testid = 'tweet']")
tw0 <- tws[[1]]
str(tw0)
tw0$getElementText()
tw0$getElementAttribute('innerHTML')
html <- tw0$getElementAttribute('innerHTML')
library(rvest)
date <- read_html(html[[1]]) %>% html_node("time") %>% html_attr("datetime") %>% head(1)
reply <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'reply']") %>%
html_text()
retweet <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'retweet']") %>%
html_text()
like <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'like']") %>%
html_text()
text <- tw0$getElementText()[[1]]
get_tweet <- function(tw0){
html <- tw0$getElementAttribute('innerHTML')
date <- read_html(html[[1]]) %>% html_nodes("time") %>% html_attr("datetime")
reply <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'reply']") %>%
html_text()
retweet <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'retweet']") %>%
html_text()
like <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'like']") %>%
html_text()
text <- tw0$getElementText()[[1]]
L <- c(date = date, reply = reply, retweet = retweet, like = like,
text = text)
return(L)
}
lapply(tws, get_tweet)
twee <- lapply(tws, get_tweet) %>% as.data.frame %>% t %>% as.data.frame
rownames(twee) <- 1:nrow(twee)
browser$executeScript("window.scrollTo(0, 3200)")
browser$executeScript("return document.body.scrollHeight")
last_height <- browser$executeScript("return document.body.scrollHeight")[[1]]
all_tweets <- c()
while (TRUE){
browser$executeScript("window.scrollTo(0, document.body.scrollHeight)")
Sys.sleep(4)
new_height <- browser$executeScript("return document.body.scrollHeight")[[1]]
tweets <- browser$findElements(using = "xpath", "//div[@data-testid = 'tweet']")
twee <- lapply(tweets, get_tweet)
all_tweets <- c(all_tweets, twee)
if (new_height == last_height){break}
last_height <- new_height
}
all_tweets
my_check <- function(v){
if (length(v) !=5){v <- v[-2]}else{v}
return(v)
}
res <- lapply(all_tweets, my_check)
final <- res %>% as.data.frame %>% t %>% as.data.frame
rownames(final) <- 1:nrow(final)
str(final)
final$reply <- as.numeric(as.character(final$reply))
final$like <- as.numeric(as.character(final$like))
final$retweet <- as.numeric(as.character(final$retweet))
final[is.na(final)] <- 0
library(tidyverse)
str_extract(final$text, pattern = "@.+")
final <- final %>% mutate(user = str_extract(text, pattern = "@.+"))
str_extract(final$text, "·\\s[0-9]{1,2}\\s.+\\s")
str_split(final$text, "·\\s[0-9]{1,2}\\s.+\\s")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment