Skip to content

Instantly share code, notes, and snippets.

@juba
Last active July 24, 2020 09:45
Show Gist options
  • Save juba/ad7e0bc46d4a7375c9d93022970e7b95 to your computer and use it in GitHub Desktop.
Save juba/ad7e0bc46d4a7375c9d93022970e7b95 to your computer and use it in GitHub Desktop.
library(quanteda)
library(rainette)
library(scatterD3)
library(ggplot2)
## Classification sur segments
corpus <- corpus_subset(data_corpus_inaugural, Year > 1970)
corpus <- split_segments(corpus)
dtm <- dfm(corpus, remove = stopwords("en"), tolower = TRUE, remove_punct = TRUE)
dtm <- dfm_trim(dtm, min_termfreq = 5)
res <- rainette(dtm, k = 5, min_uc_size = 15, min_split_members = 20)
## Avec FactoMineR / factoextra ---------------------------
library(FactoMineR)
library(factoextra)
library(scaterD3)
## Calucl de l'AFC
df <- convert(dtm, to = "matrix")
afc <- CA(df, graph = FALSE)
## Espace des segments : statique
fviz_ca_row(afc, col.row = factor(res$group), addEllipses = TRUE, labelsize = 3)
## Espace des segments : interactif
tmp <- afc$row$coords
scatterD3(
x = tmp[,1],
y = tmp[,2],
col_var = res$group,
ellipses = TRUE,
lab = rownames(tmp)
)
## Espace des termes : statique
fviz_ca_col(afc, labelsize = 3)
## Espace des termes : interactif
tmp <- afc$col$coord
scatterD3(
x = tmp[,1],
y = tmp[,2],
lab = rownames(tmp)
)
## Avec quanteda.textmodels / ggplot2 ---------------------
library(quanteda.textmodels)
library(ggplot2)
library(scatterD3)
## Calcul de l'AFC sur matrice termes-documents booléenne
ca <- textmodel_ca(dtm)
## Graphique de l'espace des segments
# Conversion des résultats
rows <- data.frame(ca$rowcoord)
rows <- cbind(rows, group = factor(res$group), name = rownames(rows))
# Graphique statique
ggplot(rows, aes(x = Dim1, y = Dim2, color = group)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_vline(xintercept = 0, linetype = 2) +
geom_point() +
stat_ellipse() +
scale_color_brewer(palette = "Set1")
# Graphique interactif
scatterD3(
rows,
x = Dim1,
y = Dim2,
col_var = group,
lab = name,
ellipses = TRUE
)
## Graphique de l'espace des termes
# Conversion des résultats
cols <- data.frame(ca$colcoord)
cols <- cbind(cols, name = rownames(cols))
# Graphique statique
ggplot(cols, aes(x = Dim1, y = Dim2, label = name)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_vline(xintercept = 0, linetype = 2) +
geom_text(size = 3, color = "red")
# Graphique interactif
scatterD3::scatterD3(
cols,
x = Dim1,
y = Dim2,
lab = name,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment