tomastitera/udpipe_fbposts.R

## udpipe_fbposts.R
setwd("~/R/snmupdpipe")
library(udpipe)
udmodel_czech <- udpipe_load_model(file = "czech-ud-2.0-170801.udpipe")
page <- read.csv("page_179497582061065_2018_04_25_14_18_33.tab", sep="\t", encoding = "UTF-8")
korpus.raw <- as.vector(page$post_message)
x <- udpipe_annotate(udmodel_czech, x = korpus.raw)
x <- as.data.frame(x)
View(x)
stats <- subset(x, upos %in% "NOUN")
View(stats)
stats <- txt_freq(x = stats$lemma)
library(lattice)
stats$key <- factor(stats$key, levels = rev(stats$key))
barchart(key ~ freq, data = head(stats, 30), col = "cadetblue", main = "Most occurring nouns", xlab = "Freq")
	setwd("~/R/snmupdpipe")
	library(udpipe)
	udmodel_czech <- udpipe_load_model(file = "czech-ud-2.0-170801.udpipe")
	page <- read.csv("page_179497582061065_2018_04_25_14_18_33.tab", sep="\t", encoding = "UTF-8")
	korpus.raw <- as.vector(page$post_message)
	x <- udpipe_annotate(udmodel_czech, x = korpus.raw)
	x <- as.data.frame(x)
	View(x)
	stats <- subset(x, upos %in% "NOUN")
	View(stats)
	stats <- txt_freq(x = stats$lemma)
	library(lattice)
	stats$key <- factor(stats$key, levels = rev(stats$key))
	barchart(key ~ freq, data = head(stats, 30), col = "cadetblue", main = "Most occurring nouns", xlab = "Freq")