Skip to content

Instantly share code, notes, and snippets.

@oholiab
Created July 10, 2013 15:35
Show Gist options
  • Save oholiab/5967334 to your computer and use it in GitHub Desktop.
Save oholiab/5967334 to your computer and use it in GitHub Desktop.
Fumblings to do svd plots of dtm information from RSS feeds
install.packages(c("RTextTools","topicmodels","twitterR"))
library("XML")
doc <- xmlTreeParse("http://feeds.bbci.co.uk/news/rss.xml")
doc
xpathApply(xmlRoot(doc), "description")
xpathApply(xmlRoot(doc), "//description")
src <- xpathApply(xmlRoot(doc), "//description")
src
src[1]
xmlSApply(src[1], xmlValue)
xmlSApply(src[[1], xmlValue)
xmlSApply(src[[1]], xmlValue)
for(i in doc){
for(i in 1:length(src)){
for (i in 1:length(src)){
if (i==1) {
foo <- xmlSApply(src[[i]], xmlValue)
DATA <- data.frame(t(foo), stringsAsFactors=FALSE)
}
else{
foo<-xmlSApply(src[[i]],xmlValue)
tmp<-data.frame(t(foo), stringsAsFactors=FALSE)
DATA<-rbind(DATA, tmp)
}
}
DATA
DATA
DATA[1]
mtcars
DATA
print.data.frame(DATA)
print.data.frame(DATA, width=10)
library(tm)
(test <- Corpus(DataframeSource(DATA, encoding = "ASCII"),
)
(test <- Corpus(DataframeSource(DATA, encoding = "ASCII"))
(test <- Corpus(DataframeSource(DATA, encoding = "ASCII")))
test
dtm <- DocumentTermMatrix(test)
dtm <- DocumentTermMatrix(test)
dtm
inspect(dtm)
inspect(dtm[1:5,1:5])
svd(dtm)
sv <- svd(dtm)
U <- sv$U
Vt <- t(sv$V)
sv$V
sv
svd
sv$info
U <- sv$u
Vt <- t(sv$v)
S < diag(sv$d)
S <- diag(sv$d)
plot(U[,1,2])
plot(U[,1:2])
inspect(dtm[1:5,1:5])
q
plot(Vt[,1:2])
pl = plot(Vt[,1:2])
pl
wordsdata = Vt[,1:2]
wordsdata
dtm$cols
names(dtm)
colnames(dtm)
plot(U[,1:2])
dim(U)
dim(Vt)
plot(Vt[,1:2])
plot(sv$v)
plot(Vt)
plot(U)
colnames(dtm)
row.names(dtm)
col.names(dtm)
Ucopy <- U
rownames(U) <- colnames(dtm)
rownames(Vt) <- colnames(dtm)
nrows(Vt)
rows(Vt)
nrow(Vt)
sv$v
nrow(sv$v)
V = sv$v
rownames(V) = colnames(dtm)
plot V
plot(V)
p = plot(V)
for(i in [1:892]){
for(i in 1:892){
textxy(V[i,1],V[i,2],rownames(V)[i])
}
install.packages("calibrate")
library(calibrate)
for(i in 1:892){
textxy(V[i,1],V[i,2],rownames(V)[i])
}
history
history()
help(history)
savehistory(file = ".Rhistory")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment