Skip to content

Instantly share code, notes, and snippets.

@allatambov
Last active May 29, 2020 12:34
Show Gist options
  • Save allatambov/63716cfd75545f8bc84a8f3b227aeb42 to your computer and use it in GitHub Desktop.
Save allatambov/63716cfd75545f8bc84a8f3b227aeb42 to your computer and use it in GitHub Desktop.
library(RSelenium)
rd <- rsDriver(browser="chrome",
extraCapabilities = list("/Users/allat/Desktop/chromedriver"), check = F,
chromever="83.0.4103.14")
browser <- rd$client
browser$navigate("https://twitter.com/explore")
# $python since:2020-05-20 until:2020-05-25
keyword <- "$python"
since <- "2020-05-20"
until <- "2020-05-25"
query <- sprintf("%s since:%s until:%s", keyword, since, until)
query
input <- browser$findElement(using = 'xpath', "//input[@enterkeyhint='search']")
input$sendKeysToElement(list(query, key = 'enter'))
tws <- browser$findElements(using = "xpath", "//div[@data-testid = 'tweet']")
tw0 <- tws[[1]]
str(tw0)
tw0$getElementText()
tw0$getElementAttribute("innerHTML")
html <- tw0$getElementAttribute("innerHTML")
library(rvest)
date <- read_html(html[[1]]) %>% html_nodes("time") %>% html_attr("datetime")
reply <- read_html(html[[1]]) %>%
html_nodes(xpath = "//div[@data-testid = 'reply']") %>% html_text()
like <- read_html(html[[1]]) %>%
html_nodes(xpath = "//div[@data-testid = 'like']") %>% html_text()
retweet <- read_html(html[[1]]) %>%
html_nodes(xpath = "//div[@data-testid = 'retweet']") %>% html_text()
text <- tw0$getElementText
get_tweet <- function(tw0){
html <- tw0$getElementAttribute('innerHTML')
date <- read_html(html[[1]]) %>% html_nodes("time") %>% html_attr("datetime")
reply <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'reply']") %>%
html_text()
retweet <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'retweet']") %>%
html_text()
like <- read_html(html[[1]]) %>% html_nodes(xpath="//div[@data-testid = 'like']") %>%
html_text()
text <- tw0$getElementText()[[1]]
L <- c(date = date, reply = reply, retweet = retweet, like = like,
text = text)
return(L)
}
twee <- lapply(tws, get_tweet) %>% as.data.frame %>% t %>% as.data.frame
rownames(twee) <- 1:nrow(twee)
browser$executeScript("window.scrollTo(0, 3200)")
browser$executeScript("return document.body.scrollHeight")
last_height <- browser$executeScript("return document.body.scrollHeight")[[1]]
all_tweets <- c()
while (TRUE){
browser$executeScript("window.scrollTo(0, document.body.scrollHeight)")
Sys.sleep(4)
new_height <- browser$executeScript("return document.body.scrollHeight")[[1]]
tweets <- browser$findElements(using = "xpath", "//div[@data-testid = 'tweet']")
twee <- lapply(tweets, get_tweet)
all_tweets <- c(all_tweets, twee)
if (new_height == last_height){break}
last_height <- new_height
}
all_tweets
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment