PolMine/gist:a3dd727f3bfec24f0918d7ebe3de8033

## gistfile1.txt
# This code, which can be adapted easily, can be used to train a word2vec model easily. Note that it
# relies on the package [wordVectors](https://github.com/bmschmidt/wordVectors).


library(wordVectors)

file_out <- "~/Lab/tmp/germaparl.txt"
vectors_bin <- "~/Lab/tmp/germaparl.bin"

.fn <- function(x){
  txt <- stringr::str_c(x, collapse = " ")
  readr::write_lines(txt, file_out, append = TRUE)
}

corpus("GERMAPARL") %>%
  split(s_attribute = "speech_id") %>%
  get_token_stream(p_attribute = "word") %>%
  lapply(.fn)

train_word2vec(file_out, vectors_bin, vectors = 200, threads = 7, window = 12, iter = 5, negative_samples = 0)
	# This code, which can be adapted easily, can be used to train a word2vec model easily. Note that it
	# relies on the package [wordVectors](https://github.com/bmschmidt/wordVectors).


	library(wordVectors)

	file_out <- "~/Lab/tmp/germaparl.txt"
	vectors_bin <- "~/Lab/tmp/germaparl.bin"

	.fn <- function(x){
	txt <- stringr::str_c(x, collapse = " ")
	readr::write_lines(txt, file_out, append = TRUE)
	}

	corpus("GERMAPARL") %>%
	split(s_attribute = "speech_id") %>%
	get_token_stream(p_attribute = "word") %>%
	lapply(.fn)

	train_word2vec(file_out, vectors_bin, vectors = 200, threads = 7, window = 12, iter = 5, negative_samples = 0)