Skip to content

Instantly share code, notes, and snippets.

@behrica
Last active February 14, 2021 22:48
Show Gist options
  • Save behrica/6d5520a7f41510fc38f640bd353f4383 to your computer and use it in GitHub Desktop.
Save behrica/6d5520a7f41510fc38f640bd353f4383 to your computer and use it in GitHub Desktop.
(ns sciloj.titanic
(:require [scicloj.metamorph.core :as morph]
[tablecloth.pipeline :as tc-mm]
[tablecloth.api :as tc]
[tech.v3.dataset.metamorph :as ds-mm]
[tech.v3.dataset :as ds]
[tech.v3.dataset.modelling :as ds-mod]
[tech.v3.ml.metamorph :as ml-mm]
[tech.v3.ml :as ml]
[tech.v3.ml.classification :as ml-class]
[tech.v3.ml.metrics :as ml-metrics]
[tech.v3.ml.loss :as ml-loss]
[tech.v3.libs.smile.classification]
[tech.v3.dataset.column-filters :as cf]
[tech.v3.datatype.functional :as dfn]
[tech.v3.datatype.argops :as argops]
[tech.v3.ml.loss :as loss]
[notespace.api :as note]
[notespace.kinds :as kind ]
[camel-snake-kebab.core :as csk]
[sciloj.evaluate :as ml-eval]
[clojure.string :as str]
))
(comment
(note/init-with-browser)
)
["# Read data"]
(def data (tc/dataset "data/titanic/train.csv" {:key-fn csk/->kebab-case-keyword}))
(tc/column-names data)
(ds/brief data)
(defn evaluate [pipeline-fn]
(:avg-loss
(ml-eval/train-k-fold data pipeline-fn loss/classification-loss 10)))
["# First pipeline using only :sex for prediction"]
(evaluate
(morph/pipeline
(tc-mm/select-columns [:survived :sex])
(ds-mm/categorical->number [:sex :survived])
(ds-mm/set-inference-target :survived)
(ml-mm/model {:model-type :smile.classification/logistic-regression})))
["# :sex and :plclass for prediction"]
(evaluate
(morph/pipeline
(tc-mm/select-columns [:survived :sex :pclass])
(ds-mm/categorical->number [:survived :sex :pclass])
(ds-mm/set-inference-target :survived)
(ml-mm/model {:model-type :smile.classification/logistic-regression})
))
["# :pclass for prediction"]
(evaluate
(morph/pipeline
(tc-mm/select-columns [:survived :pclass])
(ds-mm/categorical->number [:survived :pclass])
(ds-mm/set-inference-target :survived)
(ml-mm/model {:model-type :smile.classification/logistic-regression})
))
["Use Pclass, Sex, SibSp, Parch, for prediction"]
(evaluate
(morph/pipeline
(tc-mm/select-columns [:survived :pclass :sex :sib-sp :parch])
(ds-mm/categorical->number [:survived :pclass :sex :sib-sp :parch])
(ds-mm/set-inference-target :survived)
(ml-mm/model {:model-type :smile.classification/logistic-regression})
))
["Use Pclass, Sex, SibSp, Parch and title for prediction"]
(defn name->title [data]
(tc/add-or-replace-column
data
:title
(map
#(-> % (str/split #"\.")
first
(str/split #"\,")
last
str/trim
)
(data :name))))
(evaluate
(morph/pipeline
(tc-mm/select-columns [:survived :pclass :sex :sib-sp :parch])
(morph/lift name->title)
(ds-mm/categorical->number [:survived :pclass :sex :sib-sp :parch :title])
(ds-mm/set-inference-target :survived)
(ml-mm/model {:model-type :smile.classification/logistic-regression})
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment