Skip to content

Instantly share code, notes, and snippets.

@not-for-me
Last active August 29, 2015 14:03
Show Gist options
  • Save not-for-me/f0e269015e5681ec56ab to your computer and use it in GitHub Desktop.
Save not-for-me/f0e269015e5681ec56ab to your computer and use it in GitHub Desktop.
topicmodel_with_r
# Library Load
library(tm)
# Set file Paths
otFilePath <- "~/Documents/mining/project/old"
ntFilePath <- "~/Documents/mining/project/new"
# Import txt to TextCorpus
oldTextCorpus <- Corpus(DirSource(otFilePath), readerControl = list(reader = readPlain, language = "en"))
newTextCorpus <- Corpus(DirSource(ntFilePath), readerControl = list(reader = readPlain, language = "en"))
summary(oldTextCorpus)
summary(newTextCorpus)
# Text Preprocessing
oldTextCorpus <- tm_map(oldTextCorpus, content_transformer(tolower))
oldTextCorpus <- tm_map(oldTextCorpus, removeWords, stopwords("english"))
myStopWords <- c("also", "among", "like", "may", "must", "shall", "take", "went", "will")
oldTextCorpus <- tm_map(oldTextCorpus, removeWords, myStopWords )
oldTextCorpus <- tm_map(oldTextCorpus, removeNumbers)
oldTextCorpus <- tm_map(oldTextCorpus, removePunctuation)
oldTextCorpus <- tm_map(oldTextCorpus, stripWhitespace)
library(SnowballC)
oldTextCorpus <- tm_map(oldTextCorpus, stemDocument)
newTextCorpus <- tm_map(newTextCorpus, content_transformer(tolower))
newTextCorpus <- tm_map(newTextCorpus, removeWords, stopwords("english"))
myStopWords <- c("also", "among", "like", "may", "must", "shall", "take", "went", "will")
newTextCorpus <- tm_map(newTextCorpus, removeWords, myStopWords )
newTextCorpus <- tm_map(newTextCorpus, removeNumbers)
newTextCorpus <- tm_map(newTextCorpus, removePunctuation)
newTextCorpus <- tm_map(newTextCorpus, stripWhitespace)
newTextCorpus <- tm_map(newTextCorpus, stemDocument)
bibleCorpus <- c(oldTextCorpus, newTextCorpus)
old_dtm <-DocumentTermMatrix(oldTextCorpus)
dim(old_dtm)
new_dtm <-DocumentTermMatrix(newTextCorpus)
dim(new_dtm)
bible_dtm <-DocumentTermMatrix(bibleCorpus)
dim(bible_dtm)
set.seed(1102)
library(topicmodels)
LDA <- LDA(bible_dtm, control = list(alpha = 0.1), k = 3)
topics <- get_terms(LDA, 100)
topics[1:100, 1]
topics[1:100, 2]
topics[1:100, 3]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment