- Make sure you are in the folder that contains the
Dockerfile
- If your folder is
/my-docker-image/
, there should be 2 files in your folder:/my-docker-image | |---Dockerfile |---requirements.txt
require(GGally) | |
lm.plt <- function(data, mapping, ...){ | |
plt <- ggplot(data = data, mapping = mapping) + | |
geom_point(shape = 20, alpha = 0.7, color = 'darkseagreen') + | |
geom_smooth(method=loess, fill="red", color="red") + | |
geom_smooth(method=lm, fill="blue", color="blue") + | |
theme_minimal() | |
return(plt) | |
} |
list.of.packages <- c("ggplot2", "parallel", "tidyverse", "pROC", "caret", "corrplot", "doParallel", "dummies", "futile.logger") | |
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] | |
# Check whether the packages listed are installed or not | |
# If not then they are installed | |
if(length(new.packages)) { | |
print("Installing new packages") | |
install.packages(new.packages, repos = "http://cran.us.r-project.org") | |
} |
source("./src/load_package.R") | |
flog.info("Loading the German Credit Card Dataset") | |
# Load Dataset | |
german_credit <- read.table("./assets/data/german.data", fileEncoding="UTF-8" , dec = ",") | |
head(german_credit) | |
flog.info("Renaming the Columns") | |
colnames(german_credit) <- c('status', 'duration', 'credit_history', 'purpose', 'credit_amount', 'savings_account', 'employment', 'installment_rate','status_sex', 'guarantors', 'residence', 'property', 'age', 'other_installment', 'housing', 'existing_credits', 'job', 'maintainence_people','telephone', 'foreign', 'rating') |
source("./src/eda.R") | |
# Importing the intermediate data | |
flog.info("Loading the intermediate data") | |
german_credit <- readRDS("./assets/intermediate-files/intermediate_german_data.rds") | |
german_credit$rating <- ifelse(german_credit$rating == 1, "good", "bad") | |
# Checking for missing values | |
# unlist(lapply(german_credit, function(x) sum(is.na(x)))) |
#setwd("~/difference-engine/docker-for-data-science-r/") | |
source("./src/fe-train.R") | |
set.seed(42) | |
# Parallelizing the modelling | |
# NOTE: Try not to use all the cores | |
doParallel::registerDoParallel(parallel::detectCores() - 2) | |
# Write the ML Code here |
FROM rocker/tidyverse | |
RUN apt-get update && apt-get -y upgrade && apt-get install -y \ | |
build-essential libssl-dev libffi-dev libxml2-dev libcurl4-openssl-dev | |
RUN mkdir /home/rstudio/data /home/rstudio/models | |
VOLUME ['/home/rstudio/data', '/home/rstudio/models'] | |
RUN Rscript -e "install.packages(c('dummy', 'corrplot', 'pROC'), dependencies=TRUE)" |
FROM rocker/r-base | |
RUN apt-get update && apt-get -y upgrade && apt-get install -y \ | |
build-essential libssl-dev libffi-dev libxml2-dev libcurl4-openssl-dev | |
RUN Rscript -e "install.packages(c('caret', 'tidyverse', 'gbm', 'pROC', 'corrplot', 'doParallel', 'dummies', 'futile.logger'), dependencies=TRUE)" | |
ENV INSTALL_PATH /germancc | |
RUN mkdir -p $INSTALL_PATH |
def timeit(method): | |
def timing(*args, **kwargs): | |
timings = [] | |
print("Running this 1000 loops, for benchmarking") | |
for i in range(1000): | |
start = time.time() | |
result = method(*args, **kwargs) | |
end = time.time() | |