Skip to content

Instantly share code, notes, and snippets.

@chathurawidanage
Created August 21, 2016 07:09
Show Gist options
  • Save chathurawidanage/eac0b346ecd0842171d29ab723063d59 to your computer and use it in GitHub Desktop.
Save chathurawidanage/eac0b346ecd0842171d29ab723063d59 to your computer and use it in GitHub Desktop.
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* @author Chathura Widanage
*/
public class GenerateR {
public static void main(String[] args) throws IOException {
String pathToTrainCSV = "F:\\\\engineering\\\\Semester 7_8\\\\ML\\\\keggle\\\\train.csv";//4 backward slashes required
File rScript = new File("D:\\cor.R");//file to save the generated R script
PrintWriter pw = new PrintWriter(new FileWriter(rScript));
pw.println("library(data.table)");
pw.println("print('Reading data')");
pw.printf("train <-fread('%s',select = c('Cliente_ID', 'Producto_ID', 'Agencia_ID', 'Ruta_SAK', 'Demanda_uni_equil','Canal_ID'))\n"
, pathToTrainCSV);
ArrayList<String> permeationStrings = new ArrayList<>();
for (int i = 1; i < 32; i++) {//generating all possible combinations
permeationStrings.add(String.format("%05d", Integer.parseInt(Integer.toBinaryString(i))));
}
String columns[] = {"Cliente_ID", "Producto_ID", "Agencia_ID", "Ruta_SAK", "Canal_ID"};
permeationStrings.forEach(perm -> {
char[] bits = perm.toCharArray();
ArrayList<String> selectedColumns = new ArrayList<>();
for (int i = 0; i < bits.length; i++) {
if (bits[i] == '1') {
selectedColumns.add(columns[i]);
}
}
String varName = getMeanVarName(selectedColumns);
pw.printf("%s <- train[, .(%s = mean(Demanda_uni_equil)), by = .(%s)]\n"
, varName, varName, getColString(selectedColumns, false));
pw.printf("train <- merge(train, %s, all.x = TRUE, by = c(%s))\n"
, varName, getColString(selectedColumns, true));
pw.printf("print(paste(cor(train$Demanda_uni_equil,train$%s),\"%s\"))\n", varName, " -> " + getColString(selectedColumns, false));
pw.printf("train$%s <- NULL\n", varName);//cleaing
pw.printf("%s <- NULL\n", varName);//cleaning
pw.printf("gc()\n");//garbage collecting to prevent out of memory errors
});
pw.flush();
pw.close();
}
public static String getColString(List<String> selectedCols, boolean withQuotes) {
String cols = "";
for (int i = 0; i < selectedCols.size(); i++) {
String col = selectedCols.get(i);
cols += withQuotes ? "\"" + col + "\"" : col;
if (i != selectedCols.size() - 1) cols += ",";
}
return cols;
}
public static String getMeanVarName(List<String> selectedCols) {
String colName = "mean_";
for (String col : selectedCols) {
colName += col.substring(0, 3);
}
return colName;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment