Skip to content

Instantly share code, notes, and snippets.

@bakyeono
Created May 2, 2015 07:41
Show Gist options
  • Save bakyeono/5d8b1e8058d5e5bb672c to your computer and use it in GitHub Desktop.
Save bakyeono/5d8b1e8058d5e5bb672c to your computer and use it in GitHub Desktop.
Clojure Data Analysis snippets
(ns da
(:use [clojure repl pprint])
(:require [clojure string xml zip])
(:require [clojure.data json])
(:require [clojure.java jdbc])
(:require [net.cgrand enlive-html])
(:require [incanter core io excel])
(:import [java.net URL]))
;;; utility
(defn main
[]
nil)
(defn reload
[]
(require 'da :reload))
(defn reload-all
[]
(require 'da :reload-all))
;;; example: reading data
(defn read-csv
[]
(incanter.io/read-dataset "data/small-sample.csv"))
(defn read-csv-with-header
[]
(incanter.io/read-dataset "data/small-sample-header.csv" :header true))
(defn read-json
[]
(-> "data/small-sample.json"
slurp
clojure.data.json/read-json
incanter.core/to-dataset))
(defn read-xls
[]
(incanter.excel/read-xls "data/small-sample-header.xls"))
(defn read-jdbc
[]
(let [db {:subprotocol "sqlite"
:subname "data/small-sample.sqlite"
:classname "org.sqlite.JDBC"}
table-name 'people
sql (str "SELECT * FROM " table-name ";")]
(clojure.java.jdbc/with-connection db
(clojure.java.jdbc/with-query-results rs [sql]
(incanter.core/to-dataset (doall rs))))))
(defn read-xml
[]
(let [xml-file "data/small-sample.xml"
first-data clojure.zip/down
next-data clojure.zip/right
data-map (fn [node]
[(:tag node) (first (:content node))])]
(->>
;; 1. Parse the XML data file;
(clojure.xml/parse xml-file)
clojure.zip/xml-zip
;; 2. Walk it to extract the data nodes;
first-data
(iterate next-data)
(take-while #(not (nil? %)))
(map clojure.zip/children)
;; 3. Convert them into a sequence of maps; and
(map #(mapcat data-map %))
(map #(apply array-map %))
;; 4. Finally convert that into an Incanter dataset
incanter.core/to-dataset)))
(defn str->kwd
"Takes a string and returns a normalized keyword."
[str]
(-> str
clojure.string/lower-case
(clojure.string/replace \space \-)
keyword))
(defn read-html-table
[]
(let [url (str "http://"
"www.ericrochester.com"
"/clj-data-analysis/data/small-sample-table.html")
html (net.cgrand.enlive-html/html-resource (java.net.URL. url))
table (net.cgrand.enlive-html/select html [:table#data])
headers (->> (net.cgrand.enlive-html/select table [:tr :th])
(map net.cgrand.enlive-html/text)
(map str->kwd)
vec)
rows (->> (net.cgrand.enlive-html/select table [:tr])
(map #(net.cgrand.enlive-html/select % [:td]))
(map #(map net.cgrand.enlive-html/text %))
(filter seq))]
(incanter.core/dataset headers rows)))
(defn get-family
"Takes an article element and returns the family name."
[article]
(clojure.string/join
(map net.cgrand.enlive-html/text
(net.cgrand.enlive-html/select article [:header :h2]))))
(defn get-person
"Takes a list item and returns a map of the persons' name and
relationship."
[li]
(let [[{pnames :content} rel] (:content li)]
{:name (apply str pnames)
:relationship (clojure.string/trim rel)}))
(defn get-rows
"Takes an article and returns the person mappings, with the
family name added."
[article]
(let [family (get-family article)]
(map #(assoc % :family family)
(map get-person
(net.cgrand.enlive-html/select article [:ul :li])))))
(defn read-html-list
[]
(let [url (str "http://"
"www.ericrochester.com"
"/clj-data-analysis/data/small-sample-list.html")
html (net.cgrand.enlive-html/html-resource (java.net.URL. url))
articles (net.cgrand.enlive-html/select html [:article])]
(incanter.core/to-dataset (mapcat get-rows articles))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment