This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Basic dataset usage | |
(use '(incanter core stats charts io)) | |
;; load a dataset from a URL pointing to the data. | |
(def data | |
(read-dataset | |
"http://github.com/liebke/incanter/raw/master/data/cars.csv" | |
:header true)) | |
;; the dataset could have been loaded with the incanter.datasets/get-dataset function | |
(use 'incanter.datasets) | |
(incanter.datasets/get-dataset :cars) | |
;; view the dimensions of the dataset | |
(dim data) | |
;; view the column names | |
(col-names data) | |
(with-data (get-dataset :cars) | |
(view (conj-cols (range (nrow $data)) $data))) | |
;; plot a scatter plot of speed vs. distance and add a regression line | |
(with-data data | |
(def lm (linear-model ($ :dist) ($ :speed))) | |
(doto (scatter-plot ($ :speed) ($ :dist)) | |
(add-lines ($ :speed) (:fitted lm)) | |
view)) | |
;; create a new dataset that includes the orig data and the | |
;; fitted values from the liner-model function | |
(def results (conj-cols data (:fitted lm))) | |
;; give the new dataset meaningful column names | |
(def results (col-names data [:speed :dist :predicted-dist])) | |
;; do both steps at once with the -> macro, and also add the residuals to the new dataset | |
(def results (-> (conj-cols data (:fitted lm) (:residuals lm)) | |
(col-names [:speed :dist :predicted :residuals]))) | |
;; now use the $where function | |
($where {:speed 10} results) | |
($where {:speed {:$gt 10 :$lt 20}} results) | |
($where {:speed {:$in #{4 7 24 25}}} results) | |
($where {:speed {:$nin #{4 7 24 25}}} results) | |
(with-data results | |
(mean ($ :speed ($where {:residuals {:$gt -10 :$lt 10}})))) | |
(with-data results | |
(conj-rows ($where {:speed {:$lt 10}}) | |
($where {:speed {:$gt 20}}))) | |
(with-data results | |
($where (fn [row] (or (< (:speed row) 10) | |
(> (:speed row) 20))))) | |
;; Now let's use MongoDB | |
(use 'somnium.congomongo) | |
(use 'incanter.mongodb) | |
;; connect to a MongoDB server running on the localhost on the default port. | |
(mongo! :db "mydb") | |
(insert-dataset :breaking-dists results) | |
(def breaking-dists (fetch-dataset :breaking-dists)) | |
(col-names breaking-dists) | |
(view breaking-dists) | |
(insert-dataset :breaking-dists breaking-dists) | |
(view (fetch-dataset :breaking-dists)) | |
;; use fetch-dataset's :where option to retrieve only the | |
;; rows where the speed is between 10 and 20 mph, and | |
;; then calculate the mean breaking distance. | |
(with-data (fetch-dataset :breaking-dists | |
:where {:speed {:$gt 10 :$lt 20}}) | |
(mean ($ :dist))) | |
(doc incanter.mongodb) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment