Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
2017-12-20-SWU-Analysis
```{r setup, include=FALSE}
knitr::opts_chunk$set(collapse = TRUE,
cache = FALSE,
fig.pos = "center",
comment = "##",
fig.retina = 2,
fig.path = "img/SWU_Analysis/")
library(tidyverse)
library(rvest)
theme_update(axis.text = element_text(size = 14),
axis.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold"))
```
```{r}
site <- seq(0, 1710, 30)
url <- paste0("https://www.starwars-union.de/nachrichten/18973/SWU-Kritiken-Unsere-Gedanken-zu-Star-Wars-Die-letzten-Jedi/k/",site,"/#kommentare")
time <- vector(mode = "character", length = 0)
comments <- vector(mode = "character", length = 0)
user <- vector(mode = "character", length = 0)
for (i in 1:length(url)) {
raw <- read_html(url[i]) %>%
html_nodes(xpath = '//*[@id="kommentargesamt"]')
user_temp <- raw %>%
html_nodes("#kommentar_benutzer") %>%
html_nodes("p") %>%
html_text()
data <- raw %>%
html_nodes("#kommentar") %>%
html_nodes("p") %>%
html_text()
user <- c(user, user_temp)
time <- c(time, data[seq(1, length(data), 2)])
comments <- c(comments, data[seq(2, length(data), 2)])
}
```
```{r}
time <- time %>%
lubridate::dmy_hm()
df_time <- data.frame(time = time)
```
```{r time_analysis}
ggplot(df_time) +
geom_histogram(aes(x = time), binwidth = 3600, center = 1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Date") +
ylab("Comments per hour")
```
```{r users_analysis}
user <- as.data.frame(user)
user %>%
group_by(user) %>%
summarise(n = n()) %>%
filter(n > 20) %>%
ggplot() +
geom_bar(aes(x = reorder(user, desc(n)), y = n), stat = "identity", fill = "red3) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Comments") +
xlab("User") +
scale_fill_brewer(palette = "Paired")
```
```{r time_analysis_per_user}
user_time <- data.frame(
user = user,
time = time
) %>%
mutate(day = lubridate::day(time),
hour = lubridate::hour(time)) %>%
group_by(day, hour) %>%
summarise(n = n())
```
```{r, comments_hour}
g <- ggplot(user_time) +
geom_bar(aes(x = hour, y = n,
fill = as.factor(day)),
stat = "identity",
position = position_stack(reverse = TRUE)) +
ylab("comments") +
scale_fill_brewer(palette = "Paired",
name = "Day",
label = paste0(12:20, ".12.2017")
)
g
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.