Last active
August 29, 2015 14:10
-
-
Save jeffwong/735f2d644bf64b1c302d to your computer and use it in GitHub Desktop.
Convert R data structures to text files for VW, FM, LIBSVM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
write.libsvm = function(data, filename= "out.dat", class = 1) { | |
out = file(filename) | |
writeLines(apply(data, 1, function(X) paste(X[class],apply(cbind(which(X!=0)[-class], X[which(X!=0)[-class]]), 1, paste, collapse=":"), collapse=" ") ), out) | |
close(out) | |
} | |
#' x should be a model matrix | |
#' y is a response vector | |
write.vw = function(data, outcomeName, weightName = '', classification=F, file = 'vw.txt') { | |
objDF <- data | |
predictors <- names(objDF)[!names(objDF) %in% c(outcomeName, weightName)] | |
# LABELS & IMPORTANCE | |
if (classification) { | |
# everything should be -1,1 for classification | |
objDF[,outcomeName] <- ifelse(objDF[,outcomeName]>0,1,-1) | |
} | |
if (weightName != '') | |
objDF[,outcomeName] <- paste(objDF[,outcomeName], objDF[,weightName], "|") | |
else | |
objDF[,outcomeName] <- paste(objDF[,outcomeName], "|") | |
# Pairing column names with data... adding 1 blank character before each variable | |
for (i in predictors) { | |
objDF[,i] <- ifelse(objDF[,i]==1,paste0(' ',i), | |
ifelse(objDF[,i]==0,'',paste0(' ', i,':', objDF[,i]))) | |
} | |
# reorder columns so that label (outcome) is first followed by predictors | |
objDF <- objDF[c(outcomeName, predictors)] | |
write.table(objDF, file, sep="", quote = F, row.names = F, col.names = F) | |
} | |
write.fm = function(data, filename= "out.dat", class = 1) | |
write.libsvm(data, filename, class) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment