Skip to content

Instantly share code, notes, and snippets.

@kashitan
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kashitan/9427692 to your computer and use it in GitHub Desktop.
Save kashitan/9427692 to your computer and use it in GitHub Desktop.
@sbcare宛のTweetを形態素解析してShineで眺めるコード
library(shiny)
library(twitteR)
library(RMeCab)
library(wordcloud)
options(encoding='UTF-8')
Sys.setlocale("LC_ALL", "ja_JP.UTF-8")
# Tweetに絵文字が含まれると"&"の置換でエラーとなるため該当のTweetをNAにする
remove.emoji <- function(x) {
tryCatch(gsub("&","&", x, fixed=TRUE), error = function(e){NA})
}
shinyServer(function(input, output) {
## 認証情報の読み込み
load("twitteR_cred")
registerTwitterOAuth(twitCred)
filename <- paste0("/tmp/tweet_", Sys.getpid(), ".txt")
rm.command <- paste0('rm -rf ', filename)
tweets.freq <- NULL
tweets <- reactive({
# Tweetの検索
df <- NULL
dl <- seq(as.Date(input$since), as.Date(input$until), by="days")
for(i in 1:(length(dl)-1)) {
since <- as.character(dl[i])
until <- as.character(dl[i+1])
st <- searchTwitter('@SBCare', since=since, until=until, n=input$n, lang='ja', locale='ja')
df <- rbind(df, twListToDF(st))
}
# createdを文字型に変換
df$created <- as.character(df$created)
# @SBCareを除外
df$text <- gsub("@[0-9a-zA-Z_]+\\s*", "", df$text)
# Retweetを除外
df <- df[which(df$isRetweet==FALSE), c(1,5,11)]
# URLを除外
df$text <- gsub("https?://t.co/[0-9a-zA-Z\\._]*","",df$text)
# 絵文字を含むTweetを除外
text.mat <- as.matrix(df$text)
df$text <- apply(text.mat, 1, remove.emoji)
df <- df[!is.na(df$text), ]
# screenName, createdでソート
#df <- df[order(-df$created), ]
# 頻度表を作成するために一時ファイルに出力する
write.table(df$text,
file=filename,
row.names=F,
col.names=F,
fileEncoding="UTF-8",
quote=F)
# 頻度表を作成
tweets.freq <<- RMeCabFreq(filename)
# 名詞のみを抽出
tweets.freq <<- tweets.freq[tweets.freq$Info1 == "名詞", ]
# 数名詞、非自立名詞、接尾名詞を除去
tweets.freq <<- tweets.freq[!tweets.freq$Info2 %in% c("数","非自立","接尾"),]
# 2文字以上の単語のみを抽出
tweets.freq <<- tweets.freq[nchar(tweets.freq$Term)>2,]
# 頻度の降順でソート
tweets.freq <<- tweets.freq[order(-tweets.freq$Freq), ]
# 一時ファイル削除
system(command=rm.command)
return(df)
})
# Tweetの一覧
output$tweets <- renderTable({
tweets()
})
# ワードクラウド
output$wordcloud <- renderPlot({
pal2 <- brewer.pal(8,"Dark2")
wordcloud(tweets.freq$Term, tweets.freq$Freq, scale=c(10,.5), min.freq=input$min.freq, colors=pal2)
})
# 頻度表
output$freq <- renderTable({
tweets.freq
})
# セッション情報
#output$sessionInfo <- renderPrint({
# sessionInfo()
# capabilities()
#})
})
library(shiny)
options(encoding='UTF-8')
today <- Sys.Date()
# Define UI for miles per gallon application
shinyUI(pageWithSidebar(
# Application title
headerPanel("@SBCare Morphological Analysis"),
sidebarPanel(
h3("TwitteR"),
sliderInput("n", "n:",
min=1, max=1000, value=500),
textInput("since", "since:", value=today-7),
textInput("until", "until:", value=today),
br(),
h3("Wordcloud"),
sliderInput("min.freq", "min.freq:",
min=1, max=50, value=5),
br(),
submitButton("Get Tweets")
),
# Show a table summarizing the values entered
mainPanel(
tabsetPanel(
tabPanel("Tweets", tableOutput("tweets")),
tabPanel("WordCloud", plotOutput("wordcloud")),
tabPanel("Frequency", tableOutput("freq")) #,
#tabPanel("SessionInfo", verbatimTextOutput("sessionInfo"))
)
)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment