Skip to content

Instantly share code, notes, and snippets.

@jadianes
Created August 13, 2015 11:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jadianes/7a6d0f132f1b5bcd9647 to your computer and use it in GitHub Desktop.
Save jadianes/7a6d0f132f1b5bcd9647 to your computer and use it in GitHub Desktop.
Sentiment Analyser Shiny app
library(shiny)
library(tm)
library(SnowballC)
library(randomForest)
options(mc.cores=1)
build_model <- function(new_data_df, sparsity) {
# Create new data corpus
new_corpus <- Corpus(VectorSource(new_data_df$Text))
new_corpus <- tm_map(new_corpus, content_transformer(tolower))
new_corpus <- tm_map(new_corpus, removePunctuation)
new_corpus <- tm_map(new_corpus, removeWords, stopwords("english"))
new_corpus <- tm_map(new_corpus, stripWhitespace)
new_corpus <- tm_map(new_corpus, stemDocument)
# create document-term matrix
new_dtm <- DocumentTermMatrix(new_corpus)
new_dtm <- removeSparseTerms(new_dtm, sparsity)
new_dtm_df <- as.data.frame(as.matrix(new_dtm))
colnames(new_dtm_df) <- make.names(colnames(new_dtm_df))
# intersect corpora and prepare final training data
common_names <- intersect(colnames(train_dtm_df),colnames(new_dtm_df))
new_dtm_df <- subset(new_dtm_df, select=names(new_dtm_df) %in% common_names)
model_train_data_df <- cbind(train_data_df, subset(train_dtm_df, select=names(train_dtm_df) %in% common_names))
model_train_data_df$Text <- NULL
# train classifier
model <- randomForest(Sentiment~.,data=model_train_data_df, ntree=50)
# return value as a list
list(model, new_dtm_df)
}
shinyServer(function(input, output) {
output$contents <- renderTable({
results()
})
output$distribution <- renderPlot({
if (is.null(results()))
return(NULL)
d <- density(
as.numeric(results()$Prob > input$threshold)
)
plot(
d,
xlim = c(0, 1),
main=paste0("Sentiment Distribution (Prob > ", input$threshold, ")")
)
polygon(d, col="lightgrey", border="lightgrey")
abline(v = input$threshold, col = "blue")
})
results <- reactive({
inFile <- input$file1
if (is.null(inFile))
return(NULL)
# load input data
new_data_df <- read.csv(
inFile$datapath,
sep='\t',
header=FALSE,
quote = "",
stringsAsFactor=F,
col.names=c("Text")
)
model_and_data <- build_model(new_data_df, input$sparsity)
pred <- predict(model_and_data[[1]], newdata=model_and_data[[2]], type="prob")
new_data_df$Prob <- pred[,2]
# return data frame
new_data_df
})
})
# This is the init code, that will be run when the web app loads
# Load train and test data
train_data_df <- read.csv(
file = 'train_data.tsv',
sep='\t',
quote = "",
header=FALSE,
stringsAsFactor=F,
col.names=c("Sentiment", "Text")
)
train_data_df$Sentiment <- as.factor(train_data_df$Sentiment)
# Create training corpus for later re-use
train_corpus <- Corpus(VectorSource(train_data_df$Text))
train_corpus <- tm_map(train_corpus, content_transformer(tolower))
train_corpus <- tm_map(train_corpus, removePunctuation)
train_corpus <- tm_map(train_corpus, removeWords, stopwords("english"))
train_corpus <- tm_map(train_corpus, stripWhitespace)
train_corpus <- tm_map(train_corpus, stemDocument)
# create document-term matrix
train_dtm <- DocumentTermMatrix(train_corpus)
train_dtm <- removeSparseTerms(train_dtm, 0.995)
train_dtm_df <- data.frame(as.matrix(train_dtm))
colnames(train_dtm_df) <- make.names(colnames(train_dtm_df))
library(shiny)
shinyUI(fluidPage(
# Application title
headerPanel("Text Sentiment Analyser"),
sidebarLayout(
# the control panel
sidebarPanel(
fileInput('file1', 'Choose text File',
accept=c('text/tsv',
'text/tab-separated-values,text/plain',
'.tsv')),
tags$hr(),
sliderInput("threshold",
"Positive sentiment threshold",
min = .1,
max = .99,
value = .5),
tags$hr(),
sliderInput("sparsity",
"Max. term sparsity",
min = .1,
max = .99,
value = .95)
),
# Show a plot of the generated distribution
mainPanel(
plotOutput('distribution')
)
),
tags$hr(),
fluidRow(
# the results detail panel
column(12,
tableOutput('contents')
)
)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment