Skip to content

Instantly share code, notes, and snippets.

@s13731105
Created July 31, 2014 01:05
Show Gist options
  • Save s13731105/351c866c181fbcb91489 to your computer and use it in GitHub Desktop.
Save s13731105/351c866c181fbcb91489 to your computer and use it in GitHub Desktop.
RTextTools 練習
library(RTextTools)
library(tm)
#讀取資料
data(NYTimes)
data <- NYTimes[sample(1:3100, size=100, replace=FALSE),]
#建立 文本-詞語 矩陣
matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language = "english", removeNumbers = TRUE, stemWords = FALSE, weighting = weightTfIdf)
#對以詞語為特徵向量的文本矩陣 劃分[訓練集和測試集]
container <- create_container(matrix, data$Topic.Code, trainSize = 1:75, testSize = 76:100, virgin = TRUE)
#可以設置removeSparseTerms
#訓練模型
models <- train_models(container, algorithms=c("MAXENT","SVM",
"GLMNET", "BOOSTING",
# "SLDA","BAGGING",
"RF", "NNET",
"TREE" ))
#測試模型
results <- classify_models(container, models)
#分析結果
analytics <- create_analytics(container, results)
summary(analytics)
#測試算法準確性
#<-找不到->analytics@algorithm_summary: SUMMARY OF PRECISION, RECALL, F-SCORES, AND ACCURACY SORTED BY TOPIC CODE FOR EACH ALGORITHM
#analytics@label_summary: SUMMARY OF LABEL (e.g. TOPIC) ACCURACY
#analytics@ensemble_summary: SUMMARY OF ENSEMBLE PRECISION/COVERAGE. USES THE n VARIABLE PASSED INTO create_analytics()
head(analytics@algorithm_summary)
head(analytics@label_summary)
head(analytics@document_summary)
#<-找不到->集成分類一致性(Ensemble Agreement)
create_ensembleSummary(analytics@ensemble_summary)
#交叉驗證
N=3
corss_SVM <- cross_validate(container, N, "SVM")
cross_GLMNET <-cross_validate(container,N,"GLMNET")
cross_MAXENT <-cross_validate(container,N,"MAXENT")
#寫出結果
setwd("c:/test")
#write.csv(analytics@algorithm_summary, "SampleData_AlgorithmSummary.csv")
write.csv(analytics@label_summary, "SampleData_LabelSummary.csv")
write.csv(analytics@document_summary, "SampleData_DocumentSummary.csv")
#write.csv(analytics@ensemble_summary, "SampleData_EnsembleSummary.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment