Last active
January 15, 2023 09:36
-
-
Save TomBener/c3ebeb2c4da66d2a9a146c6925fe077b to your computer and use it in GitHub Desktop.
WeChat Moments Analysis with R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # WeChat Moments Analysis with R | |
| # `install.packages("package-name")` # nolint | |
| # Using Python in R Markdown | |
| # library(reticulate) | |
| # use_python("~/.pyenv/shims/python") | |
| # Import packages | |
| library(tufte) | |
| library(dplyr) | |
| library(tidyr) | |
| library(ggplot2) | |
| library(ggrepel) | |
| library(showtext) | |
| font_add("lxgw", "LXGWWenKai-Regular.ttf") | |
| showtext_auto() | |
| # Load the data | |
| data <- read.csv("moments.csv") | |
| sepdata <- separate_rows(data, medium, sep = ",\\s") | |
| ########## 朋友圈可见时长 | |
| data0 <- distinct(data, name, time_of_visibility) | |
| df <- | |
| data0 %>% | |
| count(time_of_visibility) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) | |
| df %>% | |
| ggplot(aes(x = "", y = n, | |
| fill = factor(time_of_visibility, | |
| levels = c("三天", "一个月", "半年", "无时限")))) + | |
| geom_col() + | |
| geom_label(aes(label = labels), | |
| position = position_stack(vjust = 0.5), | |
| size = 3, | |
| family = "lxgw", | |
| show.legend = FALSE) + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "朋友圈可见时长")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 朋友圈活跃人群 | |
| df <- | |
| data %>% | |
| count(relationship_with_me) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) %>% | |
| arrange(desc(relationship_with_me)) %>% | |
| mutate(text_y = cumsum(n) - n / 2) | |
| df %>% | |
| ggplot(aes(x = "", y = n, fill = relationship_with_me)) + | |
| geom_col() + | |
| # reference: https://stackoverflow.com/a/69715619 | |
| geom_label_repel(aes(label = labels, y = text_y), | |
| force = 0.5, | |
| nudge_x = 1, | |
| nudge_y = 0.5, | |
| size = 3, | |
| show.legend = FALSE, | |
| family = "lxgw") + | |
| coord_polar(theta = "y") + | |
| scale_fill_brewer(palette = "Pastel1") + | |
| guides(fill = guide_legend(title = NULL)) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 发朋友圈次数 | |
| df <- data %>% | |
| group_by(name) %>% | |
| mutate(count_name_occurr = n()) | |
| ggplot(data = df, | |
| aes(y = reorder(name, count_name_occurr), | |
| fill = name)) + | |
| geom_bar() + | |
| scale_x_continuous(breaks = seq(0, 18, 1)) + | |
| theme_minimal() + | |
| labs(x = "发朋友圈次数", y = "隐藏的姓名") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.text.y = element_blank(), | |
| legend.position = "none") | |
| ########## 发布的媒介类型 | |
| df <- | |
| sepdata %>% | |
| count(medium) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) %>% | |
| arrange(desc(medium)) %>% | |
| mutate(text_y = cumsum(n) - n / 2) | |
| df %>% | |
| ggplot(aes(x = "", y = n, fill = medium)) + | |
| geom_col() + | |
| geom_label_repel(aes(label = labels, y = text_y), | |
| force = 0.5, | |
| nudge_x = 0.8, | |
| nudge_y = 0.5, | |
| size = 3, | |
| show.legend = FALSE, | |
| family = "lxgw") + | |
| coord_polar(theta = "y") + | |
| scale_fill_brewer(palette = "Pastel2") + | |
| guides(fill = guide_legend(title = NULL)) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 每天的发布次数 | |
| df <- data %>% | |
| group_by(day) %>% | |
| mutate(count_day_occurr = n()) | |
| ggplot(data = df, | |
| aes(y = reorder(day, count_day_occurr), | |
| fill = day)) + | |
| geom_bar() + | |
| scale_x_continuous(breaks = seq(0, 26, 4)) + | |
| theme_minimal() + | |
| labs(x = "每天的发布次数") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| legend.position = "none") | |
| ggplot(data = df, aes(x = day, | |
| y = count_day_occurr, | |
| group = 1) | |
| ) + | |
| geom_line(color = "blue") + | |
| geom_point() + | |
| scale_y_continuous(breaks = seq(0, 26, 4)) + | |
| theme_minimal() + | |
| labs(x = "每天的发布次数") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| axis.text.x = element_text(angle = 90, hjust = 1), | |
| legend.position = "none") | |
| ########## 一天中的时间分布 | |
| df <- data %>% | |
| group_by(hour) %>% | |
| mutate(count_hour_occurr = n()) | |
| ggplot(data = df, | |
| aes(y = reorder(hour, count_hour_occurr), | |
| fill = hour)) + | |
| geom_bar() + | |
| scale_x_continuous(breaks = seq(0, 26, 4)) + | |
| theme_minimal() + | |
| labs(x = "一天中的时间分布") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| legend.position = "none") | |
| ggplot(data = df, | |
| aes(x = hour, y = count_hour_occurr, | |
| group = 1)) + | |
| geom_line(color = "blue") + | |
| geom_point() + | |
| scale_y_continuous(breaks = seq(0, 26, 4)) + | |
| theme_minimal() + | |
| labs(x = "一天中的时间分布") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| axis.text.x = element_text(angle = 90, hjust = 1), | |
| legend.position = "none") | |
| ########## 背景信息 | |
| df <- | |
| data %>% | |
| count(context) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) | |
| df %>% | |
| ggplot(aes(x = "", y = n, fill = context)) + | |
| geom_col() + | |
| geom_label(aes(label = labels), | |
| position = position_stack(vjust = 0.5), | |
| size = 3, | |
| family = "lxgw", | |
| show.legend = FALSE) + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "背景信息")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 属性 | |
| df <- | |
| data %>% | |
| count(property) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) | |
| df %>% | |
| ggplot(aes(x = "", y = n, fill = property)) + | |
| geom_col() + | |
| geom_label(aes(label = labels), | |
| position = position_stack(vjust = 0.5), | |
| size = 3, | |
| family = "lxgw", | |
| show.legend = FALSE) + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "属性")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 情绪 | |
| df <- | |
| data %>% | |
| count(emotional_level) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) | |
| df %>% | |
| ggplot(aes(x = "", y = n, fill = emotional_level)) + | |
| geom_col() + | |
| geom_label(aes(label = labels), | |
| position = position_stack(vjust = 0.5), | |
| size = 2.5, | |
| family = "lxgw", | |
| show.legend = FALSE) + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "情绪")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 图片数量 | |
| df <- data %>% | |
| group_by(number_of_images) %>% | |
| mutate(count_image_occurr = n()) | |
| ggplot(data = df, | |
| aes(x = reorder(number_of_images, | |
| -count_image_occurr), | |
| fill = factor(number_of_images))) + | |
| geom_bar() + | |
| scale_y_continuous(breaks = seq(0, 100, 10)) + | |
| theme_minimal() + | |
| labs(x = "图片数量") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| legend.position = "none") | |
| df <- | |
| data %>% | |
| count(number_of_images) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) | |
| df %>% | |
| ggplot(aes(x = "", y = n, | |
| fill = factor(number_of_images))) + | |
| geom_col() + | |
| geom_label(aes(label = labels), | |
| position = position_stack(vjust = 0.5), | |
| size = 2.3, | |
| family = "lxgw", | |
| show.legend = FALSE) + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "图片数量")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 点赞数量 | |
| df <- data %>% | |
| group_by(number_of_likes) %>% | |
| mutate(count_like_occurr = n()) | |
| ggplot(data = df, | |
| aes(x = reorder(number_of_likes, | |
| -count_like_occurr), | |
| fill = factor(number_of_likes))) + | |
| geom_bar() + | |
| scale_y_continuous(breaks = seq(0, 170, 10)) + | |
| theme_minimal() + | |
| labs(x = "点赞数量") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| legend.position = "none") | |
| df <- | |
| data %>% | |
| count(number_of_likes) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) %>% | |
| arrange(desc(number_of_likes)) %>% | |
| mutate(text_y = cumsum(n) - n / 2) | |
| df %>% | |
| ggplot(aes(x = "", y = n, | |
| fill = factor(number_of_likes))) + | |
| geom_col() + | |
| geom_label_repel(aes(label = labels, y = text_y), | |
| force = 0.5, | |
| nudge_x = 1, | |
| nudge_y = 0.5, | |
| size = 3, | |
| show.legend = FALSE, | |
| family = "lxgw") + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "点赞数量")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) | |
| ########## 评论数量 | |
| df <- data %>% | |
| group_by(number_of_comments) %>% | |
| mutate(count_comment_occurr = n()) | |
| ggplot(data = df, | |
| aes(x = reorder(number_of_comments, | |
| -count_comment_occurr), | |
| fill = factor(number_of_comments))) + | |
| geom_bar() + | |
| scale_y_continuous(breaks = seq(0, 220, 10)) + | |
| theme_minimal() + | |
| labs(x = "评论数量") + | |
| theme(text = element_text(family = "lxgw"), | |
| axis.title.y = element_blank(), | |
| legend.position = "none") | |
| df <- | |
| data %>% | |
| count(number_of_comments) %>% | |
| mutate(perc = n / sum(n)) %>% | |
| mutate(labels = scales::percent(perc)) %>% | |
| arrange(desc(number_of_comments)) %>% | |
| mutate(text_y = cumsum(n) - n / 2) | |
| df %>% | |
| ggplot(aes(x = "", y = n, | |
| fill = factor(number_of_comments))) + | |
| geom_col() + | |
| geom_label_repel(aes(label = labels, y = text_y), | |
| force = 0.5, | |
| nudge_x = 1, | |
| nudge_y = 0.5, | |
| size = 3, | |
| show.legend = FALSE, | |
| family = "lxgw") + | |
| coord_polar(theta = "y") + | |
| guides(fill = guide_legend(title = "评论数量")) + | |
| theme_void() + | |
| theme(text = element_text(family = "lxgw")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Keep in mind you must create a file containing the formatted data named
moments.csvbefore running the code.The
moments.csvis like:Then you can execute the code by running the command below:
A 17-page PDF file named
Rplots.pdfwill be generated in the current directory.