Last active
March 4, 2016 06:19
-
-
Save Ray901/87178f1b465232f3f3f0 to your computer and use it in GitHub Desktop.
R shiny wordCloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
library(rJava) | |
if (!require('Rwordseg')) { | |
install.packages("Rwordseg",repos = 'http://R-Forge.R-project.org') | |
} | |
library(Rwordseg) | |
if (!require('tm')) { | |
install.packages('tm') | |
} | |
library(tm) | |
if (!require('tmcn')) { | |
install.packages('tmcn',repos = 'http://R-Forge.R-project.org') | |
} | |
library(tmcn) | |
if (!require('wordcloud')) { | |
install.packages('wordcloud') | |
} | |
library(wordcloud) | |
setNewWords<-c( | |
"国中生","高中生","英听","题目","国文","国一","国一上","国一下", | |
"国二","国二上","国二下","国三","国三上","国三下", | |
"国中","国小" | |
) | |
setRemoveWords<-c( | |
"無","很","好","希望","更","太","上","再","沒","沒有","不", | |
"你們","為什麼","還有","可以","可不可以","請問","會", | |
"會不會","怎麼","不能","嗎","謝謝","最","都","錯了" | |
) | |
insertWords(setNewWords) | |
################################################################ | |
shinyServer(function(input, output , session) { | |
getwords<-reactive({ | |
input$run | |
setFile <- isolate(input$file1) | |
if (!is.null(setFile)) { | |
dat <- read.csv(setFile$datapath) | |
} else { | |
dat <- NULL | |
} | |
if (!is.null(dat)) { | |
setField<-isolate(input$fieldSelect) | |
allStrVec<-as.character(dat[,setField]) | |
if (length(allStrVec)<1) { | |
stop("資料太少了") | |
} | |
seqmentVec<-segmentCN(allStrVec , nature = TRUE) | |
d.corpus <- Corpus(VectorSource(seqmentVec)) | |
myStopWords <- c(stopwordsCN(),setRemoveWords) | |
d.corpus <- tm_map(d.corpus, removeWords, myStopWords) | |
dtm1 <- DocumentTermMatrix(d.corpus, | |
control = list( | |
wordLengths=c(1, Inf), # to allow long words | |
bounds = list(global = c(5,Inf)), # each term appears in at least 5 docs | |
removeNumbers = TRUE, | |
# removePunctuation = list(preserve_intra_word_dashes = FALSE), | |
weighting = weightTf, | |
encoding = "UTF-8") | |
) | |
m <- as.matrix(dtm1) | |
v <- sort(colSums(m), decreasing=TRUE) | |
myNames <- names(v) | |
wordDat <- data.frame(word=myNames, freq=v) | |
} else { | |
wordDat <- NULL | |
} | |
return(wordDat) | |
}) | |
observe({ | |
setFile <- input$file1 | |
if (!is.null(setFile)) { | |
newDat<-read.csv(setFile$datapath) | |
} else { | |
newDat<-NULL | |
} | |
updateSelectInput(session, "fieldSelect", | |
choices = names(newDat) | |
) | |
}) | |
output$plot <- renderPlot({ | |
d<-getwords() | |
if (!is.null(d)) { | |
setMinFreq<-isolate(input$minfreq) | |
wordcloud(as.character(d$word), d$freq, scale=c(7,2), | |
min.freq = setMinFreq, random.order = F, ordered.colors = F, | |
colors = rainbow(length(row.names(d)))) | |
} | |
}) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
shinyUI(fluidPage( | |
titlePanel("Word Cloud"), | |
fluidRow( | |
column(3, | |
div( | |
fileInput('file1', 'Choose CSV File', | |
accept=c('text/csv', | |
'text/comma-separated-values,text/plain', | |
'.csv')), | |
selectInput('fieldSelect',"請選取資料欄位 : ",''), | |
sliderInput("minfreq", | |
"Minimum Frequency:", | |
min = 1, max = 50, value = 5), | |
actionButton("run", "Run") | |
) | |
), | |
column(8, | |
div(class = "span2", | |
plotOutput('plot') | |
) | |
) | |
) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment