Last active
August 29, 2015 13:57
-
-
Save kashitan/9427692 to your computer and use it in GitHub Desktop.
@sbcare宛のTweetを形態素解析してShineで眺めるコード
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
library(twitteR) | |
library(RMeCab) | |
library(wordcloud) | |
options(encoding='UTF-8') | |
Sys.setlocale("LC_ALL", "ja_JP.UTF-8") | |
# Tweetに絵文字が含まれると"&"の置換でエラーとなるため該当のTweetをNAにする | |
remove.emoji <- function(x) { | |
tryCatch(gsub("&","&", x, fixed=TRUE), error = function(e){NA}) | |
} | |
shinyServer(function(input, output) { | |
## 認証情報の読み込み | |
load("twitteR_cred") | |
registerTwitterOAuth(twitCred) | |
filename <- paste0("/tmp/tweet_", Sys.getpid(), ".txt") | |
rm.command <- paste0('rm -rf ', filename) | |
tweets.freq <- NULL | |
tweets <- reactive({ | |
# Tweetの検索 | |
df <- NULL | |
dl <- seq(as.Date(input$since), as.Date(input$until), by="days") | |
for(i in 1:(length(dl)-1)) { | |
since <- as.character(dl[i]) | |
until <- as.character(dl[i+1]) | |
st <- searchTwitter('@SBCare', since=since, until=until, n=input$n, lang='ja', locale='ja') | |
df <- rbind(df, twListToDF(st)) | |
} | |
# createdを文字型に変換 | |
df$created <- as.character(df$created) | |
# @SBCareを除外 | |
df$text <- gsub("@[0-9a-zA-Z_]+\\s*", "", df$text) | |
# Retweetを除外 | |
df <- df[which(df$isRetweet==FALSE), c(1,5,11)] | |
# URLを除外 | |
df$text <- gsub("https?://t.co/[0-9a-zA-Z\\._]*","",df$text) | |
# 絵文字を含むTweetを除外 | |
text.mat <- as.matrix(df$text) | |
df$text <- apply(text.mat, 1, remove.emoji) | |
df <- df[!is.na(df$text), ] | |
# screenName, createdでソート | |
#df <- df[order(-df$created), ] | |
# 頻度表を作成するために一時ファイルに出力する | |
write.table(df$text, | |
file=filename, | |
row.names=F, | |
col.names=F, | |
fileEncoding="UTF-8", | |
quote=F) | |
# 頻度表を作成 | |
tweets.freq <<- RMeCabFreq(filename) | |
# 名詞のみを抽出 | |
tweets.freq <<- tweets.freq[tweets.freq$Info1 == "名詞", ] | |
# 数名詞、非自立名詞、接尾名詞を除去 | |
tweets.freq <<- tweets.freq[!tweets.freq$Info2 %in% c("数","非自立","接尾"),] | |
# 2文字以上の単語のみを抽出 | |
tweets.freq <<- tweets.freq[nchar(tweets.freq$Term)>2,] | |
# 頻度の降順でソート | |
tweets.freq <<- tweets.freq[order(-tweets.freq$Freq), ] | |
# 一時ファイル削除 | |
system(command=rm.command) | |
return(df) | |
}) | |
# Tweetの一覧 | |
output$tweets <- renderTable({ | |
tweets() | |
}) | |
# ワードクラウド | |
output$wordcloud <- renderPlot({ | |
pal2 <- brewer.pal(8,"Dark2") | |
wordcloud(tweets.freq$Term, tweets.freq$Freq, scale=c(10,.5), min.freq=input$min.freq, colors=pal2) | |
}) | |
# 頻度表 | |
output$freq <- renderTable({ | |
tweets.freq | |
}) | |
# セッション情報 | |
#output$sessionInfo <- renderPrint({ | |
# sessionInfo() | |
# capabilities() | |
#}) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
options(encoding='UTF-8') | |
today <- Sys.Date() | |
# Define UI for miles per gallon application | |
shinyUI(pageWithSidebar( | |
# Application title | |
headerPanel("@SBCare Morphological Analysis"), | |
sidebarPanel( | |
h3("TwitteR"), | |
sliderInput("n", "n:", | |
min=1, max=1000, value=500), | |
textInput("since", "since:", value=today-7), | |
textInput("until", "until:", value=today), | |
br(), | |
h3("Wordcloud"), | |
sliderInput("min.freq", "min.freq:", | |
min=1, max=50, value=5), | |
br(), | |
submitButton("Get Tweets") | |
), | |
# Show a table summarizing the values entered | |
mainPanel( | |
tabsetPanel( | |
tabPanel("Tweets", tableOutput("tweets")), | |
tabPanel("WordCloud", plotOutput("wordcloud")), | |
tabPanel("Frequency", tableOutput("freq")) #, | |
#tabPanel("SessionInfo", verbatimTextOutput("sessionInfo")) | |
) | |
) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment