Skip to content

Instantly share code, notes, and snippets.

@Zoldin
Zoldin / evaluate.r
Last active July 21, 2017 21:03
evaluate.R
#!/usr/bin/Rscript
library(Matrix)
library(glmnet)
args = commandArgs(trailingOnly=TRUE)
if (!length(args)==3) {
stop("Three arguments must be supplied ( file name where model is stored (RDataname), test file (.txt, matrix) and file name for AUC output).n", call.=FALSE)
}
@Zoldin
Zoldin / featurization.R
Last active July 21, 2017 20:59
featurization.R
#!/usr/bin/Rscript
library(text2vec)
library(MASS)
library(Matrix)
args = commandArgs(trailingOnly=TRUE)
if (!length(args)==4) {
stop("Four arguments must be supplied ( train file (csv format) ,test data set (csv format), train output file name and test output file name - txt files ).n", call.=FALSE)
}
@Zoldin
Zoldin / parsingxml.R
Last active July 21, 2017 20:53
parsingxml.R
#!/usr/bin/Rscript
library(XML)
args = commandArgs(trailingOnly=TRUE)
if (!length(args)==2) {
stop("Two arguments must be supplied (input file name ,output file name - csv ext).n", call.=FALSE)
}
#read XML line by line
@Zoldin
Zoldin / requirements.R
Created July 21, 2017 20:41
requirements.R
#!/usr/bin/Rscript
install.packages("XML")
install.packages("caret")
install.packages("text2vec")
install.packages("MASS")
install.packages("Matrix")
install.packages("glmnet")
@Zoldin
Zoldin / train_model.R
Last active July 21, 2017 21:02
train_model.R
#!/usr/bin/Rscript
library(Matrix)
library(glmnet)
# three arguments needs to be provided - train file (.txt, matrix), seed and output name for RData file
args = commandArgs(trailingOnly=TRUE)
if (!length(args)==3) {
stop("Three arguments must be supplied ( train file (.txt, matrix), seed and argument for RData model name).n", call.=FALSE)
@Zoldin
Zoldin / train_test_splitting.R
Created July 21, 2017 20:42
train_test_splitting.R
#!/usr/bin/Rscript
library(caret)
args = commandArgs(trailingOnly=TRUE)
if (!length(args)==5) {
stop("Five arguments must be supplied (input file name, splitting ratio related to test data set, seed, train output file name, test output file name).n", call.=FALSE)
}
@Zoldin
Zoldin / train_model_Python
Created August 2, 2017 06:52
train_model_Python
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import sys
try: import cPickle as pickle # python2
except: import pickle # python3
from scipy import sparse
from numpy import loadtxt
if len(sys.argv) != 4:
@Zoldin
Zoldin / train_model_Python.py
Last active August 19, 2017 17:01
train_model_PYthon
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import sys
try: import cPickle as pickle # python2
except: import pickle # python3
from scipy import sparse
from numpy import loadtxt
import feather as ft
if len(sys.argv) != 4:
@Zoldin
Zoldin / evaluation_python_model.py
Last active August 19, 2017 17:00
Evaluation_python_model
from sklearn.metrics import precision_recall_curve
import sys
import sklearn.metrics as metrics
from scipy import sparse
from numpy import loadtxt
try: import cPickle as pickle # python2
except: import pickle # python3
import feather as ft
if len(sys.argv) != 4:
Let's prepare the data with Python:
```{python data_load}
import pandas as pd
data = pd.read_csv("15m.csv")
data.rename(columns={"Unnamed: 0": "datetime"},inplace=True)
data.head()
```