Created
August 21, 2016 07:09
-
-
Save chathurawidanage/eac0b346ecd0842171d29ab723063d59 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.*; | |
import java.util.ArrayList; | |
import java.util.List; | |
/** | |
* @author Chathura Widanage | |
*/ | |
public class GenerateR { | |
public static void main(String[] args) throws IOException { | |
String pathToTrainCSV = "F:\\\\engineering\\\\Semester 7_8\\\\ML\\\\keggle\\\\train.csv";//4 backward slashes required | |
File rScript = new File("D:\\cor.R");//file to save the generated R script | |
PrintWriter pw = new PrintWriter(new FileWriter(rScript)); | |
pw.println("library(data.table)"); | |
pw.println("print('Reading data')"); | |
pw.printf("train <-fread('%s',select = c('Cliente_ID', 'Producto_ID', 'Agencia_ID', 'Ruta_SAK', 'Demanda_uni_equil','Canal_ID'))\n" | |
, pathToTrainCSV); | |
ArrayList<String> permeationStrings = new ArrayList<>(); | |
for (int i = 1; i < 32; i++) {//generating all possible combinations | |
permeationStrings.add(String.format("%05d", Integer.parseInt(Integer.toBinaryString(i)))); | |
} | |
String columns[] = {"Cliente_ID", "Producto_ID", "Agencia_ID", "Ruta_SAK", "Canal_ID"}; | |
permeationStrings.forEach(perm -> { | |
char[] bits = perm.toCharArray(); | |
ArrayList<String> selectedColumns = new ArrayList<>(); | |
for (int i = 0; i < bits.length; i++) { | |
if (bits[i] == '1') { | |
selectedColumns.add(columns[i]); | |
} | |
} | |
String varName = getMeanVarName(selectedColumns); | |
pw.printf("%s <- train[, .(%s = mean(Demanda_uni_equil)), by = .(%s)]\n" | |
, varName, varName, getColString(selectedColumns, false)); | |
pw.printf("train <- merge(train, %s, all.x = TRUE, by = c(%s))\n" | |
, varName, getColString(selectedColumns, true)); | |
pw.printf("print(paste(cor(train$Demanda_uni_equil,train$%s),\"%s\"))\n", varName, " -> " + getColString(selectedColumns, false)); | |
pw.printf("train$%s <- NULL\n", varName);//cleaing | |
pw.printf("%s <- NULL\n", varName);//cleaning | |
pw.printf("gc()\n");//garbage collecting to prevent out of memory errors | |
}); | |
pw.flush(); | |
pw.close(); | |
} | |
public static String getColString(List<String> selectedCols, boolean withQuotes) { | |
String cols = ""; | |
for (int i = 0; i < selectedCols.size(); i++) { | |
String col = selectedCols.get(i); | |
cols += withQuotes ? "\"" + col + "\"" : col; | |
if (i != selectedCols.size() - 1) cols += ","; | |
} | |
return cols; | |
} | |
public static String getMeanVarName(List<String> selectedCols) { | |
String colName = "mean_"; | |
for (String col : selectedCols) { | |
colName += col.substring(0, 3); | |
} | |
return colName; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment