Skip to content

Instantly share code, notes, and snippets.

@grigory93
Last active December 16, 2019 17:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save grigory93/33a34a895d5e8c8305db20c8221b1ca6 to your computer and use it in GitHub Desktop.
Save grigory93/33a34a895d5e8c8305db20c8221b1ca6 to your computer and use it in GitHub Desktop.
creating visuals for DAI
# Load dai package
library(dai)
# Connect to Driverless AI instance
dai_uri = "http://your.instance.name:12345"
usr = "user_id"
pwd = "password"
dai.connect(uri = dai_uri, username = usr, password = pwd, force_version = FALSE)
# Import data: pick one of these 2 methods below
# Upload datafile (change path to the location on your client machine):
titanic_all = dai.upload_dataset("data/titanic.csv")
# import from S3:
titanic_all = dai.create_dataset("s3://h2o-public-test-data/smalldata/titanic/titanic_expanded.csv")
# Split data
partitions = dai.split_dataset(dataset = titanic_all,
output_name1 = "titanic_train", output_name2 = "titanic_test",
ratio = 0.8, seed = 107030, target = "survived")
titanic_train = partitions[[1]]
titanic_test = partitions[[2]]
titanic_model = dai.train(training_frame = titanic_train, testing_frame = titanic_test,
target_col = "survived", is_classification = TRUE, is_timeseries = FALSE,
cols_to_drop = c("boat","body","home.dest"),
time = 4, accuracy = 4, interpretability = 5,
experiment_name = "titanic-enh-model-445",
enable_gpus = TRUE, seed = 75252,
config_overrides = "make_python_scoring_pipeline = 'off'")
file_path = dai.download_file(titanic_model$test_predictions_path,
dest_path = "data/titanic_test_predictions.csv",
force=TRUE)
preds = read.csv(file_path)
titanic_scored = predict(titanic_model, newdata = titanic_all,
include_columns = c("survived","pclass","sex","embarked"),
return_df = TRUE)
library(ggplot2)
library(scales)
library(ggthemes)
ggplot(titanic_scored, aes(x=survived.No)) +
geom_density(fill='grey', alpha = .7, trim=TRUE) +
theme_tufte(base_size = 12, base_family = 'Palatino') + geom_rangeframe() +
labs(y = NULL) +
theme(legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
ggplot(titanic_scored, aes(x=survived.No, fill=survived)) +
geom_density(alpha = .5, trim=TRUE) +
theme_tufte(base_size = 12, base_family = 'Palatino') + geom_rangeframe() +
labs(y = NULL) +
theme(legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
ggplot(titanic_scored, aes(x=survived.No, fill=survived)) +
geom_density(alpha = .7, trim=TRUE) +
facet_wrap(~sex, ncol=1, scales = "free_y") +
labs(y = NULL) +
theme_tufte(ticks=TRUE) + geom_rangeframe() +
theme(legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
ggplot(titanic_scored[titanic_scored$embarked!="",], aes(x=survived.No, fill=survived)) +
geom_density(alpha = .7, trim=TRUE) +
facet_wrap(~embarked, ncol=1, scales = "free_y") +
labs(y = NULL) +
theme_tufte(ticks=TRUE) + geom_rangeframe() +
theme(legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment