Skip to content

Instantly share code, notes, and snippets.

@TomBener
Last active January 15, 2023 09:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TomBener/c3ebeb2c4da66d2a9a146c6925fe077b to your computer and use it in GitHub Desktop.
Save TomBener/c3ebeb2c4da66d2a9a146c6925fe077b to your computer and use it in GitHub Desktop.
WeChat Moments Analysis with R
# WeChat Moments Analysis with R
# `install.packages("package-name")` # nolint
# Using Python in R Markdown
# library(reticulate)
# use_python("~/.pyenv/shims/python")
# Import packages
library(tufte)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(showtext)
font_add("lxgw", "LXGWWenKai-Regular.ttf")
showtext_auto()
# Load the data
data <- read.csv("moments.csv")
sepdata <- separate_rows(data, medium, sep = ",\\s")
########## 朋友圈可见时长
data0 <- distinct(data, name, time_of_visibility)
df <-
data0 %>%
count(time_of_visibility) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc))
df %>%
ggplot(aes(x = "", y = n,
fill = factor(time_of_visibility,
levels = c("三天", "一个月", "半年", "无时限")))) +
geom_col() +
geom_label(aes(label = labels),
position = position_stack(vjust = 0.5),
size = 3,
family = "lxgw",
show.legend = FALSE) +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "朋友圈可见时长")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 朋友圈活跃人群
df <-
data %>%
count(relationship_with_me) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(relationship_with_me)) %>%
mutate(text_y = cumsum(n) - n / 2)
df %>%
ggplot(aes(x = "", y = n, fill = relationship_with_me)) +
geom_col() +
# reference: https://stackoverflow.com/a/69715619
geom_label_repel(aes(label = labels, y = text_y),
force = 0.5,
nudge_x = 1,
nudge_y = 0.5,
size = 3,
show.legend = FALSE,
family = "lxgw") +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Pastel1") +
guides(fill = guide_legend(title = NULL)) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 发朋友圈次数
df <- data %>%
group_by(name) %>%
mutate(count_name_occurr = n())
ggplot(data = df,
aes(y = reorder(name, count_name_occurr),
fill = name)) +
geom_bar() +
scale_x_continuous(breaks = seq(0, 18, 1)) +
theme_minimal() +
labs(x = "发朋友圈次数", y = "隐藏的姓名") +
theme(text = element_text(family = "lxgw"),
axis.text.y = element_blank(),
legend.position = "none")
########## 发布的媒介类型
df <-
sepdata %>%
count(medium) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(medium)) %>%
mutate(text_y = cumsum(n) - n / 2)
df %>%
ggplot(aes(x = "", y = n, fill = medium)) +
geom_col() +
geom_label_repel(aes(label = labels, y = text_y),
force = 0.5,
nudge_x = 0.8,
nudge_y = 0.5,
size = 3,
show.legend = FALSE,
family = "lxgw") +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "Pastel2") +
guides(fill = guide_legend(title = NULL)) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 每天的发布次数
df <- data %>%
group_by(day) %>%
mutate(count_day_occurr = n())
ggplot(data = df,
aes(y = reorder(day, count_day_occurr),
fill = day)) +
geom_bar() +
scale_x_continuous(breaks = seq(0, 26, 4)) +
theme_minimal() +
labs(x = "每天的发布次数") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
legend.position = "none")
ggplot(data = df, aes(x = day,
y = count_day_occurr,
group = 1)
) +
geom_line(color = "blue") +
geom_point() +
scale_y_continuous(breaks = seq(0, 26, 4)) +
theme_minimal() +
labs(x = "每天的发布次数") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none")
########## 一天中的时间分布
df <- data %>%
group_by(hour) %>%
mutate(count_hour_occurr = n())
ggplot(data = df,
aes(y = reorder(hour, count_hour_occurr),
fill = hour)) +
geom_bar() +
scale_x_continuous(breaks = seq(0, 26, 4)) +
theme_minimal() +
labs(x = "一天中的时间分布") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
legend.position = "none")
ggplot(data = df,
aes(x = hour, y = count_hour_occurr,
group = 1)) +
geom_line(color = "blue") +
geom_point() +
scale_y_continuous(breaks = seq(0, 26, 4)) +
theme_minimal() +
labs(x = "一天中的时间分布") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none")
########## 背景信息
df <-
data %>%
count(context) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc))
df %>%
ggplot(aes(x = "", y = n, fill = context)) +
geom_col() +
geom_label(aes(label = labels),
position = position_stack(vjust = 0.5),
size = 3,
family = "lxgw",
show.legend = FALSE) +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "背景信息")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 属性
df <-
data %>%
count(property) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc))
df %>%
ggplot(aes(x = "", y = n, fill = property)) +
geom_col() +
geom_label(aes(label = labels),
position = position_stack(vjust = 0.5),
size = 3,
family = "lxgw",
show.legend = FALSE) +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "属性")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 情绪
df <-
data %>%
count(emotional_level) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc))
df %>%
ggplot(aes(x = "", y = n, fill = emotional_level)) +
geom_col() +
geom_label(aes(label = labels),
position = position_stack(vjust = 0.5),
size = 2.5,
family = "lxgw",
show.legend = FALSE) +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "情绪")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 图片数量
df <- data %>%
group_by(number_of_images) %>%
mutate(count_image_occurr = n())
ggplot(data = df,
aes(x = reorder(number_of_images,
-count_image_occurr),
fill = factor(number_of_images))) +
geom_bar() +
scale_y_continuous(breaks = seq(0, 100, 10)) +
theme_minimal() +
labs(x = "图片数量") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
legend.position = "none")
df <-
data %>%
count(number_of_images) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc))
df %>%
ggplot(aes(x = "", y = n,
fill = factor(number_of_images))) +
geom_col() +
geom_label(aes(label = labels),
position = position_stack(vjust = 0.5),
size = 2.3,
family = "lxgw",
show.legend = FALSE) +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "图片数量")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 点赞数量
df <- data %>%
group_by(number_of_likes) %>%
mutate(count_like_occurr = n())
ggplot(data = df,
aes(x = reorder(number_of_likes,
-count_like_occurr),
fill = factor(number_of_likes))) +
geom_bar() +
scale_y_continuous(breaks = seq(0, 170, 10)) +
theme_minimal() +
labs(x = "点赞数量") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
legend.position = "none")
df <-
data %>%
count(number_of_likes) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(number_of_likes)) %>%
mutate(text_y = cumsum(n) - n / 2)
df %>%
ggplot(aes(x = "", y = n,
fill = factor(number_of_likes))) +
geom_col() +
geom_label_repel(aes(label = labels, y = text_y),
force = 0.5,
nudge_x = 1,
nudge_y = 0.5,
size = 3,
show.legend = FALSE,
family = "lxgw") +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "点赞数量")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
########## 评论数量
df <- data %>%
group_by(number_of_comments) %>%
mutate(count_comment_occurr = n())
ggplot(data = df,
aes(x = reorder(number_of_comments,
-count_comment_occurr),
fill = factor(number_of_comments))) +
geom_bar() +
scale_y_continuous(breaks = seq(0, 220, 10)) +
theme_minimal() +
labs(x = "评论数量") +
theme(text = element_text(family = "lxgw"),
axis.title.y = element_blank(),
legend.position = "none")
df <-
data %>%
count(number_of_comments) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(number_of_comments)) %>%
mutate(text_y = cumsum(n) - n / 2)
df %>%
ggplot(aes(x = "", y = n,
fill = factor(number_of_comments))) +
geom_col() +
geom_label_repel(aes(label = labels, y = text_y),
force = 0.5,
nudge_x = 1,
nudge_y = 0.5,
size = 3,
show.legend = FALSE,
family = "lxgw") +
coord_polar(theta = "y") +
guides(fill = guide_legend(title = "评论数量")) +
theme_void() +
theme(text = element_text(family = "lxgw"))
@TomBener
Copy link
Author

TomBener commented Jan 13, 2023

Keep in mind you must create a file containing the formatted data named moments.csv before running the code.

The moments.csv is like:

name, relationship_with_me, time_of_visibility, published_time, day, hour, keywords, medium, number_of_images, context, property, emotional_level, number_of_likes, number_of_comments, last_edited_time
张大锤, 高中同学, 半年, "May 24, 2022 13:20", 2022-05-24, 13:20, 投票, "emoji, 图片, 文字", 2, 是, personal, positive, 13, 0, "June 15, 2022 04:47 PM"
王二云, 本科同学, 三天, "May 22, 2022 9:26", 2022-05-22, 09:26, 毕业答辩, "emoji, 文字, 标签, 链接", 0, 是, exclusive, neutral, 0, 0, "June 13, 2022 11:29 AM"
李小乐, 研究生同学, 一个月, "May 30, 2022 22:31", 2022-05-30, 22:00, 音乐, 音乐, 0, 否, exclusive, neutral, 0, 0, "June 15, 2022 10:28 PM"

Then you can execute the code by running the command below:

$ Rscript wechat-moments-analysis.r

A 17-page PDF file named Rplots.pdf will be generated in the current directory.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment