Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created November 29, 2013 22:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrishanretty/7713010 to your computer and use it in GitHub Desktop.
Save chrishanretty/7713010 to your computer and use it in GitHub Desktop.
Classifier for Times headlines
### Load libraries
library(RTextTools)
### Make sure .xlsx file has been converted to CSV properly
media <- read.csv("media1960-2008_websiteversion_111007.csv",
header=T,as.is=T)
media$Date <- as.Date(media$Date,"%d/%m/%Y")
media <- subset(media,Date > as.Date("1996-01-01"))
media <- media[,c("Title","Subtitle","Major_Topic")]
media_matrix <- create_matrix(cbind(media["Title"],media["Subtitle"]),
language="english", removeNumbers=TRUE,
stemWords=TRUE, weighting=weightTfIdf)
corpus <- create_container(media_matrix,media$Major_Topic,
trainSize=1:5700,testSize=5701:nrow(media),
virgin=FALSE)
names(attributes(corpus)) #class matrix_container
models <- train_models(corpus, algorithms=c("SVM","MAXENT"))
results <- classify_models(corpus, models)
##########################################
# VIEW THE RESULTS BY CREATING ANALYTICS #
##########################################
analytics <- create_analytics(corpus, results)
head(analytics@algorithm_summary)
head(analytics@label_summary)
head(analytics@document_summary)
analytics@ensemble_summary
# WRITE OUT THE DATA TO A CSV --- look in your working directory
write.csv(analytics@algorithm_summary,"times_AlgorithmSummary.csv")
write.csv(analytics@label_summary,"times_LabelSummary.csv")
write.csv(analytics@document_summary,"times_DocumentSummary.csv")
write.csv(analytics@ensemble_summary,"times_EnsembleSummary.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment