Skip to content

Instantly share code, notes, and snippets.

@jeffwong
Last active August 29, 2015 14:10
Show Gist options
  • Save jeffwong/735f2d644bf64b1c302d to your computer and use it in GitHub Desktop.
Save jeffwong/735f2d644bf64b1c302d to your computer and use it in GitHub Desktop.
Convert R data structures to text files for VW, FM, LIBSVM
write.libsvm = function(data, filename= "out.dat", class = 1) {
out = file(filename)
writeLines(apply(data, 1, function(X) paste(X[class],apply(cbind(which(X!=0)[-class], X[which(X!=0)[-class]]), 1, paste, collapse=":"), collapse=" ") ), out)
close(out)
}
#' x should be a model matrix
#' y is a response vector
write.vw = function(data, outcomeName, weightName = '', classification=F, file = 'vw.txt') {
objDF <- data
predictors <- names(objDF)[!names(objDF) %in% c(outcomeName, weightName)]
# LABELS & IMPORTANCE
if (classification) {
# everything should be -1,1 for classification
objDF[,outcomeName] <- ifelse(objDF[,outcomeName]>0,1,-1)
}
if (weightName != '')
objDF[,outcomeName] <- paste(objDF[,outcomeName], objDF[,weightName], "|")
else
objDF[,outcomeName] <- paste(objDF[,outcomeName], "|")
# Pairing column names with data... adding 1 blank character before each variable
for (i in predictors) {
objDF[,i] <- ifelse(objDF[,i]==1,paste0(' ',i),
ifelse(objDF[,i]==0,'',paste0(' ', i,':', objDF[,i])))
}
# reorder columns so that label (outcome) is first followed by predictors
objDF <- objDF[c(outcomeName, predictors)]
write.table(objDF, file, sep="", quote = F, row.names = F, col.names = F)
}
write.fm = function(data, filename= "out.dat", class = 1)
write.libsvm(data, filename, class)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment