-
-
Save eightysteele/7627b0c9cdf4c9e551b7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns mol | |
(:use cascalog.api) | |
(:require [clojure.string :as s] | |
[cascalog.ops :as c] | |
[clojure.java.io :as io])) | |
;; Tab (\t) separated text file: | |
;; | |
;; gbif-id latitude longitude nub-id name | |
;; 236499193 24.663458 121.61801 1986930 Abaciscus | |
;; 236499220 23.455833 120.89278 1986930 Abaciscus | |
;; 236499202 24.663458 121.61801 1986930 Abaciscus | |
;; | |
(def gbif-points-path | |
"Returns our test data as a File object." | |
(io/as-file (io/resource "testdata/mol-1000.txt"))) | |
(defn to-floats [& xs] | |
"Converts a sequence of numbers to floats." | |
(map #(Float. %) xs)) | |
(defn make-dwc-tuples [path] | |
"Returns a Cascalog query that returns [lat, lng, name, count] where count is | |
the number of times the name appears in the file." | |
(let [src (hfs-textline path) | |
count-q (<- [?name ?species-count] | |
(src ?textline) | |
(s/split ?textline #"\t" :> _ _ _ _ ?name) | |
(c/count ?species-count))] | |
(<- [?lat-float ?lon-float ?name ?species-count] | |
(src ?textline) | |
(s/split ?textline #"\t" :> _ ?lat ?lon _ ?name) | |
(count-q ?name ?species-count) | |
(to-floats ?lat ?lon :> ?lat-float ?lon-float) | |
(:distinct false)))) | |
(defn run-mol-job [] | |
(?- (stdout) (make-dwc-tuples gbif-points-path))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment