(use '(incanter core stats charts io))

(def salaries (read-dataset "http://www.headfirstlabs.com/books/hfda/hfda_ch10_employees.csv" :header true))

(def salary-plot (scatter-plot 
                  ;; Select the col requested where the negotiaded col is TRUE
                  (-> (sel salaries :filter #(= (nth % 3) "TRUE")) 
                      (sel :cols :requested ))
                  (-> (sel salaries :filter #(= (nth % 3) "TRUE")) 
                      (sel :cols :received ))))

(view salary-plot)

;;; Calculate the correlation coefficient

(correlation (-> (sel salaries :filter #(= (nth % 3) "TRUE")) 
                 (sel :cols :requested ))
             (-> (sel salaries :filter #(= (nth % 3) "TRUE")) 
                 (sel :cols :received )))
;;; => 0.6656481025557299

;;; Calculate the Linear Model
(def salary-lm (linear-model
                (-> (sel salaries :filter #(= (nth % 3) "TRUE"))
                    (sel :cols :requested ))
                (-> (sel salaries :filter #(= (nth % 3) "TRUE")) 
                 (sel :cols :received )))) 

;;; The Coefficients
(:coefs salary-lm)
;;; => (3.0297198624654484 0.6110990511023138)

;; add regression line to scatter plot
;; define the function, y = 3.02  + 0.61 x
(defn reg-line [x] (+ 3.02 (* 0.61 x)))
(add-function salary-plot reg-line 0 22)