Skip to content

Instantly share code, notes, and snippets.

@piotrpiatyszek
Last active October 19, 2020 02:22
Show Gist options
  • Save piotrpiatyszek/fb6670fa45ed5fbb34c2785fdbe4b340 to your computer and use it in GitHub Desktop.
Save piotrpiatyszek/fb6670fa45ed5fbb34c2785fdbe4b340 to your computer and use it in GitHub Desktop.
library(dplyr)
library(DALEX)
library(skimr)
library(forcats)
library(randomNames)
raw <- read.csv("./CreditScoring.csv")
raw %>%
filter(Status != 0, Home !=0, Marital !=0, Job != 0) %>%
filter_all(function(x) x != 99999999) %>%
mutate(
Status = fct_recode(as.factor(Status), negative="2", positive="1") %>% fct_rev,
Home = fct_recode(as.factor(Home), rent="1", notarial_act="2", private_contract="3", ignoring_contract="4", parents="5", other="6"),
Marital = fct_recode(as.factor(Marital), single="1", married="2", widow="3", separated="4", divorced="5"),
Records = fct_recode(as.factor(Records), no="1", yes="2"),
Job = fct_recode(as.factor(Job), fixed="1", partime="2", freelance="3", other="4")
) -> df
set.seed(1313)
rnd_names <- randomNames(n=nrow(df)*3, name.order="first.last", name.sep=" ") %>% unique
rownames(df) <- rnd_names[seq_len(nrow(df))]
train_index <- sample(1:nrow(df), size=round(0.6 * nrow(df)))
train <- df[train_index, ]
test <- df[-train_index, ]
library(arenar)
library(randomForest)
model <- randomForest(Status ~ ., data = train)
explainer <- DALEX::explain(model, data = test[,-1], y = test$Status == "positive")
model_performance(explainer)
# Measures for: classification
# recall : 0.9128728
# precision : 0.8161404
# f1 : 0.8618007
# accuracy : 0.7868571
# auc : 0.8287271
#
# Residuals:
# 0% 10% 20% 30% 40% 50% 60% 70% 80%
# -0.9920 -0.6122 -0.3640 0.0080 0.0340 0.0660 0.1120 0.1840 0.2800
# 90% 100%
# 0.4300 0.8640
create_arena(live=T) %>% push_model(explainer) %>% push_observations(test) %>% push_observations(changed) %>% run_server(port=9763)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment