not-for-me/topicmodel.r

## topicmodel.r
# Library Load
library(tm)

# Set file Paths
otFilePath <- "~/Documents/mining/project/old"
ntFilePath <- "~/Documents/mining/project/new"

# Import txt to TextCorpus
oldTextCorpus <- Corpus(DirSource(otFilePath), readerControl = list(reader = readPlain, language = "en"))
newTextCorpus <- Corpus(DirSource(ntFilePath), readerControl = list(reader = readPlain, language = "en"))
summary(oldTextCorpus)
summary(newTextCorpus)


# Text Preprocessing
oldTextCorpus <- tm_map(oldTextCorpus, content_transformer(tolower))
oldTextCorpus <- tm_map(oldTextCorpus, removeWords, stopwords("english"))
myStopWords <- c("also", "among", "like", "may", "must", "shall", "take", "went", "will")
oldTextCorpus <- tm_map(oldTextCorpus, removeWords, myStopWords )
oldTextCorpus <- tm_map(oldTextCorpus, removeNumbers)
oldTextCorpus <- tm_map(oldTextCorpus, removePunctuation)
oldTextCorpus <- tm_map(oldTextCorpus, stripWhitespace)
library(SnowballC)
oldTextCorpus <- tm_map(oldTextCorpus, stemDocument)

newTextCorpus <- tm_map(newTextCorpus, content_transformer(tolower))
newTextCorpus <- tm_map(newTextCorpus, removeWords, stopwords("english"))
myStopWords <- c("also", "among", "like", "may", "must", "shall", "take", "went", "will")
newTextCorpus <- tm_map(newTextCorpus, removeWords, myStopWords )
newTextCorpus <- tm_map(newTextCorpus, removeNumbers)
newTextCorpus <- tm_map(newTextCorpus, removePunctuation)
newTextCorpus <- tm_map(newTextCorpus, stripWhitespace)
newTextCorpus <- tm_map(newTextCorpus, stemDocument)

bibleCorpus <- c(oldTextCorpus, newTextCorpus)

old_dtm <-DocumentTermMatrix(oldTextCorpus)
dim(old_dtm)

new_dtm <-DocumentTermMatrix(newTextCorpus)
dim(new_dtm)

bible_dtm <-DocumentTermMatrix(bibleCorpus)
dim(bible_dtm)


set.seed(1102)
library(topicmodels)
LDA <- LDA(bible_dtm, control = list(alpha = 0.1), k = 3)
topics <- get_terms(LDA, 100)
topics[1:100, 1]
topics[1:100, 2]
topics[1:100, 3]
	# Library Load
	library(tm)

	# Set file Paths
	otFilePath <- "~/Documents/mining/project/old"
	ntFilePath <- "~/Documents/mining/project/new"

	# Import txt to TextCorpus
	oldTextCorpus <- Corpus(DirSource(otFilePath), readerControl = list(reader = readPlain, language = "en"))
	newTextCorpus <- Corpus(DirSource(ntFilePath), readerControl = list(reader = readPlain, language = "en"))
	summary(oldTextCorpus)
	summary(newTextCorpus)


	# Text Preprocessing
	oldTextCorpus <- tm_map(oldTextCorpus, content_transformer(tolower))
	oldTextCorpus <- tm_map(oldTextCorpus, removeWords, stopwords("english"))
	myStopWords <- c("also", "among", "like", "may", "must", "shall", "take", "went", "will")
	oldTextCorpus <- tm_map(oldTextCorpus, removeWords, myStopWords )
	oldTextCorpus <- tm_map(oldTextCorpus, removeNumbers)
	oldTextCorpus <- tm_map(oldTextCorpus, removePunctuation)
	oldTextCorpus <- tm_map(oldTextCorpus, stripWhitespace)
	library(SnowballC)
	oldTextCorpus <- tm_map(oldTextCorpus, stemDocument)

	newTextCorpus <- tm_map(newTextCorpus, content_transformer(tolower))
	newTextCorpus <- tm_map(newTextCorpus, removeWords, stopwords("english"))
	myStopWords <- c("also", "among", "like", "may", "must", "shall", "take", "went", "will")
	newTextCorpus <- tm_map(newTextCorpus, removeWords, myStopWords )
	newTextCorpus <- tm_map(newTextCorpus, removeNumbers)
	newTextCorpus <- tm_map(newTextCorpus, removePunctuation)
	newTextCorpus <- tm_map(newTextCorpus, stripWhitespace)
	newTextCorpus <- tm_map(newTextCorpus, stemDocument)

	bibleCorpus <- c(oldTextCorpus, newTextCorpus)

	old_dtm <-DocumentTermMatrix(oldTextCorpus)
	dim(old_dtm)

	new_dtm <-DocumentTermMatrix(newTextCorpus)
	dim(new_dtm)

	bible_dtm <-DocumentTermMatrix(bibleCorpus)
	dim(bible_dtm)


	set.seed(1102)
	library(topicmodels)
	LDA <- LDA(bible_dtm, control = list(alpha = 0.1), k = 3)
	topics <- get_terms(LDA, 100)
	topics[1:100, 1]
	topics[1:100, 2]
	topics[1:100, 3]