Create a gist now

Instantly share code, notes, and snippets.

QSTK Tutorial 1 in Clojure
(ns qstk.tutorial1
(:use [clj-time.core :exclude 'extend])
(:use clj-time.format)
(:use clj-time.coerce)
(:require incanter.io)
(:require incanter.core))
(def ^{:dynamic true} *QS* (get (System/getenv) "QS"))
(defn get-NYSE-days
"Create a set of timestamps between startday and endday (inclusive)
that correspond to the days there was trading at the NYSE"
[start-date end-date time-of-day]
(let [dates-file (str *QS* "/qstkutil/NYSE_dates.txt")
NYSE-dates (incanter.io/read-dataset dates-file)
fmt (formatter "MM/dd/yyyy")
dates (incanter.core/$map #(parse fmt %) :col0 NYSE-dates)]
(set (filter (fn [d] (within? (interval start-date end-date) d))
(map #(plus % time-of-day) dates)))))
(defn read-symbols-data
"Returns a hashmap of symbols/incanter datasets read from QS data directory"
[source-in symbols]
(let [data-dir (str *QS* "/QSData/" source-in "/")]
(reduce #(assoc %1 %2 (incanter.io/read-dataset (str data-dir %2 ".csv") :header true)) {} symbols)))
(defn select-value
"Given a dataset indexed by date, returns the value corresponding to a specified column
if existent for a specific date"
[ds column date]
(let [row (ds {:Date date})]
(when-not (nil? row) (incanter.core/$ 0 column row))))
(defn get-data
"Given a list of `symbols`, its data and a list of specific `timestamps`, builds a matrix(sequence)
with each column corresponding to a stock and the value extracted using `column`
:Date AAPL GOOG WFC
2012-02-01 54.1 33.1 25.0
2012-02-02 56.3 33.4 22.9"
[timestamps symbols column symbols-data time-of-day]
(let [grouped-data (reduce (fn [m, s] (assoc m (s 0) (incanter.core/$group-by :Date (s 1)))) {} symbols-data)]
(for [t timestamps]
(let [unparsed-date (unparse (formatters :year-month-day) t)]
(reduce #(merge %1 {(%2 0) (select-value (%2 1) column unparsed-date)}) {:Date (to-long t)} grouped-data)))))
(defn multi-series-chart
"Creates a xy-chart with multiple series extracted from column data
as specified by series parameter"
[{:keys [series title x-label y-label data]}]
(let [chart (incanter.charts/time-series-plot :Date (first series)
:x-label x-label
:y-label y-label
:title title
:series-label (first series)
:legend true
:data data)]
(reduce #(incanter.charts/add-lines %1 :Date %2 :series-label %2 :data data) chart (rest series))))
(defmacro apply-filtered
"Given two sequences, apply a function to each pair of elements when condition is met
anaphoras n and m exists for each indexed element
e.g. (apply-filtered / [1 2 3] [1 0 3] when (> m 0)) => (1 nil 1)
"
[op a b & condition]
`(for [x# (range (count ~a))]
(let [n# (nth ~a x#)
m# (nth ~b x#)]
(when (~(second condition) n# m#)
(~op n# m#)))))
(defmacro apply-rows
"Apply an operation to each row of the dataset excluding :Date column
A start row and a condition must be given
.e.g Divide each row by vector [1 0 3] starting from 0 validating divide by zero
(apply-rows ds (/ [1 2 3]) 0 (fn [n m] (> m 0))"
[data operation start cond]
`(let [raw-data# (incanter.core/$ :all [:not :Date] ~data)
raw-cols# (incanter.core/col-names raw-data#)
dates# (incanter.core/$ :all :Date ~data)]
(incanter.core/col-names
(incanter.core/conj-cols
(for [~'i (range ~start (incanter.core/nrow raw-data#))]
(apply-filtered
~(first operation)
(vec (incanter.core/$ ~'i [:not :Date] raw-data#))
~(second operation)
:when ~cond))
dates#) (conj raw-cols# :Date))))
(defn normalize
"Divide each row in a dataset by the first row"
[ds]
(let [first-row (vec (incanter.core/$ 0 [:not :Date] ds))]
(apply-rows ds (/ first-row) 0 (fn [n m] (and (not-any? nil? [n m]) (> m 0))))))
(defn daily-rets
"Daily returns"
[data]
(apply-rows data
((fn [n m] (- (/ n m) 1)) (vec (incanter.core/$ (- i 1) [:not :Date] data)))
1
(fn [n m] (and (not-any? nil? [n m]) (> m 0)))))
(defn run
[]
(let [symbols ["AAPL","GLD","GOOG","$SPX","XOM"]
start-day (date-time 2012 1 1)
end-day (date-time 2012 12 31)
time-of-day (hours 16)
timestamps (get-NYSE-days start-day end-day time-of-day)
symbols-data (read-symbols-data "Yahoo" symbols)
adj-close-data (incanter.core/to-dataset
(get-data timestamps symbols (keyword "Adj Close") symbols-data time-of-day))]
(do
;;first chart of the tutorial
(incanter.core/view (multi-series-chart {:series symbols
:x-label "Date"
:y-label "Adjusted Close"
:title "Adjusted close data"
:data adj-close-data}))
;;second chart of the tutorial
(incanter.core/view (multi-series-chart {:series symbols
:x-label "Date"
:y-label "Adjusted Close"
:title "Normalized close data"
:data (normalize adj-close-data)}))
;;daily returns
(incanter.core/view (multi-series-chart {:series symbols
:x-label "Date"
:y-label "Return"
:title "Daily Returns"
:data (daily-rets adj-close-data)}))
;;quotes comparison with scatter plot
(incanter.core/view (incanter.charts/scatter-plot
"AAPL" "GOOG"
:x-label "AAPL"
:y-label "GOOG"
:legend true
:data (normalize adj-close-data)
)))))
(defn -main
[& args]
(run))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment