Skip to content

Instantly share code, notes, and snippets.

@s13731105
s13731105 / RTextTools.R
Created July 31, 2014 01:05
RTextTools 練習
library(RTextTools)
library(tm)
#讀取資料
data(NYTimes)
data <- NYTimes[sample(1:3100, size=100, replace=FALSE),]
#建立 文本-詞語 矩陣
matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language = "english", removeNumbers = TRUE, stemWords = FALSE, weighting = weightTfIdf)
@s13731105
s13731105 / LoadData.R
Last active August 29, 2015 14:04
Load Data
library(tm)
setwd('C:/test/001')
#sets R's working directory to near where my files are
a <-Corpus(DirSource("/001"), readerControl = list(language="lat"))
summary(a)
a <- tm_map(a, removeNumbers)
a <- tm_map(a , stripWhitespace)
a <- tm_map(a, removePunctuation)
@s13731105
s13731105 / basicTM.R
Created July 25, 2014 14:05
basic Text Mining
policy.HTML.page <-readLines("http://policy.unt.edu/policy/3-5")
length(policy.HTML.page)
policy.HTML.page[186:202]
id.1 <- 3 + which(policy.HTML.page ==" TOTAL UNIVERSITY </div>")
id.2 <- id.1+5
text.data <- policy.HTML.page[id.1:id.2]
rm(policy.HTML.page, id.1, id.2)
text.data
@s13731105
s13731105 / Rwordseg.R
Last active June 2, 2016 22:12
用 R 進行中文 text Mining
library(XML)
library(RCurl)
library(tm)
library(tmcn)
library(Rwordseg)
setwd("C:/test")
d.corpus <-Corpus(DirSource("/test/doc"), readerControl = list(language="UTF-8"))
#上兩行為抓取本機文字檔, 餘皆同網路上說明文字
@s13731105
s13731105 / twitter_SA.R
Created July 25, 2014 13:54
Twitter Sentiment Analysis
library(twitteR)
library(ROAuth)
library(ggplot2)
kejriwal.list <- searchTwitter('#kejriwal', n=1000)
#twListToDF() will take a list of objects from a single twitteR class and return a data.frame version of the members
kejriwal.df =twListToDF(kejriwal.list)
write.csv(kejriwal.df, file='c:/test/kejriwal.csv', row.names=F)
@s13731105
s13731105 / sentimet_install.R
Created July 25, 2014 13:46
install package "sentiment"
require(devtools)
install_url("http://cran.r-project.org/src/contrib/Archive/sentiment/sentiment_0.2.tar.gz")
require(sentiment)
ls("package:sentiment")
@s13731105
s13731105 / twittwr auth.R
Created July 25, 2014 13:41
Twitter Authentication with R
library(devtools)
library(twitteR)
api_key <- "89a1epAjv6QL2drupmbkYDAXX"
api_secret <- "pYBHCKVWKNsuq4stoVM3HF2KBAmcCsgcLPvnY7SwyUuWBxAWfH"
access_token <- "2352914730-1Dx4Xel5WTHBZuXuLv8AEBKXkgEgSdRzFlG0jPp"
access_token_secret <- "mMLw1ryN9D2W5wurR2b68JGNvLv69zLp0xx7V6gXJedW6"