Created
July 31, 2014 01:05
-
-
Save s13731105/351c866c181fbcb91489 to your computer and use it in GitHub Desktop.
RTextTools 練習
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RTextTools) | |
library(tm) | |
#讀取資料 | |
data(NYTimes) | |
data <- NYTimes[sample(1:3100, size=100, replace=FALSE),] | |
#建立 文本-詞語 矩陣 | |
matrix <- create_matrix(cbind(data["Title"],data["Subject"]), language = "english", removeNumbers = TRUE, stemWords = FALSE, weighting = weightTfIdf) | |
#對以詞語為特徵向量的文本矩陣 劃分[訓練集和測試集] | |
container <- create_container(matrix, data$Topic.Code, trainSize = 1:75, testSize = 76:100, virgin = TRUE) | |
#可以設置removeSparseTerms | |
#訓練模型 | |
models <- train_models(container, algorithms=c("MAXENT","SVM", | |
"GLMNET", "BOOSTING", | |
# "SLDA","BAGGING", | |
"RF", "NNET", | |
"TREE" )) | |
#測試模型 | |
results <- classify_models(container, models) | |
#分析結果 | |
analytics <- create_analytics(container, results) | |
summary(analytics) | |
#測試算法準確性 | |
#<-找不到->analytics@algorithm_summary: SUMMARY OF PRECISION, RECALL, F-SCORES, AND ACCURACY SORTED BY TOPIC CODE FOR EACH ALGORITHM | |
#analytics@label_summary: SUMMARY OF LABEL (e.g. TOPIC) ACCURACY | |
#analytics@ensemble_summary: SUMMARY OF ENSEMBLE PRECISION/COVERAGE. USES THE n VARIABLE PASSED INTO create_analytics() | |
head(analytics@algorithm_summary) | |
head(analytics@label_summary) | |
head(analytics@document_summary) | |
#<-找不到->集成分類一致性(Ensemble Agreement) | |
create_ensembleSummary(analytics@ensemble_summary) | |
#交叉驗證 | |
N=3 | |
corss_SVM <- cross_validate(container, N, "SVM") | |
cross_GLMNET <-cross_validate(container,N,"GLMNET") | |
cross_MAXENT <-cross_validate(container,N,"MAXENT") | |
#寫出結果 | |
setwd("c:/test") | |
#write.csv(analytics@algorithm_summary, "SampleData_AlgorithmSummary.csv") | |
write.csv(analytics@label_summary, "SampleData_LabelSummary.csv") | |
write.csv(analytics@document_summary, "SampleData_DocumentSummary.csv") | |
#write.csv(analytics@ensemble_summary, "SampleData_EnsembleSummary.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment