jadianes/server.R

## server.R
library(shiny)
library(tm)
library(SnowballC)
library(randomForest)

options(mc.cores=1)

build_model <- function(new_data_df, sparsity) {
    # Create new data corpus
    new_corpus <- Corpus(VectorSource(new_data_df$Text))
    new_corpus <- tm_map(new_corpus, content_transformer(tolower))
    new_corpus <- tm_map(new_corpus, removePunctuation)
    new_corpus <- tm_map(new_corpus, removeWords, stopwords("english"))
    new_corpus <- tm_map(new_corpus, stripWhitespace)
    new_corpus <- tm_map(new_corpus, stemDocument)

    # create document-term matrix
    new_dtm <- DocumentTermMatrix(new_corpus)
    new_dtm <- removeSparseTerms(new_dtm, sparsity)
    new_dtm_df <- as.data.frame(as.matrix(new_dtm))
    colnames(new_dtm_df) <- make.names(colnames(new_dtm_df))

    # intersect corpora and prepare final training data
    common_names <- intersect(colnames(train_dtm_df),colnames(new_dtm_df))
    new_dtm_df <- subset(new_dtm_df, select=names(new_dtm_df) %in% common_names)

    model_train_data_df <- cbind(train_data_df, subset(train_dtm_df, select=names(train_dtm_df) %in% common_names))
    model_train_data_df$Text <- NULL

    # train classifier
    model <- randomForest(Sentiment~.,data=model_train_data_df, ntree=50)

    # return value as a list
    list(model, new_dtm_df)
}


shinyServer(function(input, output) {

    output$contents <- renderTable({
        results()
    })

    output$distribution <- renderPlot({
        if (is.null(results()))
            return(NULL)
        d <- density(
            as.numeric(results()$Prob > input$threshold)
        )
        plot(
            d,
            xlim = c(0, 1),
            main=paste0("Sentiment Distribution (Prob > ", input$threshold, ")")
        )
        polygon(d, col="lightgrey", border="lightgrey")
        abline(v = input$threshold, col = "blue")
    })

    results <- reactive({
        inFile <- input$file1

        if (is.null(inFile))
            return(NULL)

        # load input data
        new_data_df <- read.csv(
            inFile$datapath,
            sep='\t',
            header=FALSE,
            quote = "",
            stringsAsFactor=F,
            col.names=c("Text")
        )

        model_and_data <- build_model(new_data_df, input$sparsity)

        pred <- predict(model_and_data[[1]], newdata=model_and_data[[2]], type="prob")

        new_data_df$Prob <- pred[,2]

        # return data frame
        new_data_df
    })
})

# This is the init code, that will be run when the web app loads

# Load train and test data
train_data_df <- read.csv(
    file = 'train_data.tsv',
    sep='\t',
    quote = "",
    header=FALSE,
    stringsAsFactor=F,
    col.names=c("Sentiment", "Text")
)
train_data_df$Sentiment <- as.factor(train_data_df$Sentiment)

# Create training corpus for later re-use
train_corpus <- Corpus(VectorSource(train_data_df$Text))
train_corpus <- tm_map(train_corpus, content_transformer(tolower))
train_corpus <- tm_map(train_corpus, removePunctuation)
train_corpus <- tm_map(train_corpus, removeWords, stopwords("english"))
train_corpus <- tm_map(train_corpus, stripWhitespace)
train_corpus <- tm_map(train_corpus, stemDocument)

# create document-term matrix
train_dtm <- DocumentTermMatrix(train_corpus)
train_dtm <- removeSparseTerms(train_dtm, 0.995)
train_dtm_df <- data.frame(as.matrix(train_dtm))
colnames(train_dtm_df) <- make.names(colnames(train_dtm_df))

## ui.R
library(shiny)

shinyUI(fluidPage(

    # Application title
    headerPanel("Text Sentiment Analyser"),

    sidebarLayout(
        # the control panel
        sidebarPanel(
            fileInput('file1', 'Choose text File',
                      accept=c('text/tsv',
                               'text/tab-separated-values,text/plain',
                               '.tsv')),
            tags$hr(),
            sliderInput("threshold",
                        "Positive sentiment threshold",
                        min = .1,
                        max = .99,
                        value = .5),
            tags$hr(),
            sliderInput("sparsity",
                        "Max. term sparsity",
                        min = .1,
                        max = .99,
                        value = .95)
        ),

        # Show a plot of the generated distribution
        mainPanel(
            plotOutput('distribution')
        )
    ),
    tags$hr(),
    fluidRow(
        # the results detail panel
        column(12,
            tableOutput('contents')
        )
    )
))
	library(shiny)
	library(tm)
	library(SnowballC)
	library(randomForest)

	options(mc.cores=1)

	build_model <- function(new_data_df, sparsity) {
	# Create new data corpus
	new_corpus <- Corpus(VectorSource(new_data_df$Text))
	new_corpus <- tm_map(new_corpus, content_transformer(tolower))
	new_corpus <- tm_map(new_corpus, removePunctuation)
	new_corpus <- tm_map(new_corpus, removeWords, stopwords("english"))
	new_corpus <- tm_map(new_corpus, stripWhitespace)
	new_corpus <- tm_map(new_corpus, stemDocument)

	# create document-term matrix
	new_dtm <- DocumentTermMatrix(new_corpus)
	new_dtm <- removeSparseTerms(new_dtm, sparsity)
	new_dtm_df <- as.data.frame(as.matrix(new_dtm))
	colnames(new_dtm_df) <- make.names(colnames(new_dtm_df))

	# intersect corpora and prepare final training data
	common_names <- intersect(colnames(train_dtm_df),colnames(new_dtm_df))
	new_dtm_df <- subset(new_dtm_df, select=names(new_dtm_df) %in% common_names)

	model_train_data_df <- cbind(train_data_df, subset(train_dtm_df, select=names(train_dtm_df) %in% common_names))
	model_train_data_df$Text <- NULL

	# train classifier
	model <- randomForest(Sentiment~.,data=model_train_data_df, ntree=50)

	# return value as a list
	list(model, new_dtm_df)
	}


	shinyServer(function(input, output) {

	output$contents <- renderTable({
	results()
	})

	output$distribution <- renderPlot({
	if (is.null(results()))
	return(NULL)
	d <- density(
	as.numeric(results()$Prob > input$threshold)
	)
	plot(
	d,
	xlim = c(0, 1),
	main=paste0("Sentiment Distribution (Prob > ", input$threshold, ")")
	)
	polygon(d, col="lightgrey", border="lightgrey")
	abline(v = input$threshold, col = "blue")
	})

	results <- reactive({
	inFile <- input$file1

	if (is.null(inFile))
	return(NULL)

	# load input data
	new_data_df <- read.csv(
	inFile$datapath,
	sep='\t',
	header=FALSE,
	quote = "",
	stringsAsFactor=F,
	col.names=c("Text")
	)

	model_and_data <- build_model(new_data_df, input$sparsity)

	pred <- predict(model_and_data[[1]], newdata=model_and_data[[2]], type="prob")

	new_data_df$Prob <- pred[,2]

	# return data frame
	new_data_df
	})
	})

	# This is the init code, that will be run when the web app loads

	# Load train and test data
	train_data_df <- read.csv(
	file = 'train_data.tsv',
	sep='\t',
	quote = "",
	header=FALSE,
	stringsAsFactor=F,
	col.names=c("Sentiment", "Text")
	)
	train_data_df$Sentiment <- as.factor(train_data_df$Sentiment)

	# Create training corpus for later re-use
	train_corpus <- Corpus(VectorSource(train_data_df$Text))
	train_corpus <- tm_map(train_corpus, content_transformer(tolower))
	train_corpus <- tm_map(train_corpus, removePunctuation)
	train_corpus <- tm_map(train_corpus, removeWords, stopwords("english"))
	train_corpus <- tm_map(train_corpus, stripWhitespace)
	train_corpus <- tm_map(train_corpus, stemDocument)

	# create document-term matrix
	train_dtm <- DocumentTermMatrix(train_corpus)
	train_dtm <- removeSparseTerms(train_dtm, 0.995)
	train_dtm_df <- data.frame(as.matrix(train_dtm))
	colnames(train_dtm_df) <- make.names(colnames(train_dtm_df))
	library(shiny)

	shinyUI(fluidPage(

	# Application title
	headerPanel("Text Sentiment Analyser"),

	sidebarLayout(
	# the control panel
	sidebarPanel(
	fileInput('file1', 'Choose text File',
	accept=c('text/tsv',
	'text/tab-separated-values,text/plain',
	'.tsv')),
	tags$hr(),
	sliderInput("threshold",
	"Positive sentiment threshold",
	min = .1,
	max = .99,
	value = .5),
	tags$hr(),
	sliderInput("sparsity",
	"Max. term sparsity",
	min = .1,
	max = .99,
	value = .95)
	),

	# Show a plot of the generated distribution
	mainPanel(
	plotOutput('distribution')
	)
	),
	tags$hr(),
	fluidRow(
	# the results detail panel
	column(12,
	tableOutput('contents')
	)
	)
	))