Skip to content

Instantly share code, notes, and snippets.

@danhammer
Created March 7, 2012 20:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danhammer/1995731 to your computer and use it in GitHub Desktop.
Save danhammer/1995731 to your computer and use it in GitHub Desktop.
cascalog queries for final clearing probabilities
(ns forma.hadoop.jobs.forma
(:use cascalog.api)
(:require [cascalog.ops :as c]
[forma.trends.analysis :as a]
[forma.classify.logistic :as log]))
(def get-loc
(<- [?chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?val]
(map ?chunk [:location :value] :> ?loc ?val)
(schema/unpack-pixel-location ?loc :> ?s-res ?mod-h ?mod-v ?sample ?line)))
(defn fire-tap
"Accepts an est-map and a query source of fire timeseries. Note that
this won't work, pulling directly from the pail!"
[est-map fire-src]
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?fire-series]
(fire-src ?chunk)
(get-loc ?chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?f-series)
(schema/adjust-fires est-map ?f-series :> ?fire-series)))
(defn filter-query [vcf-src vcf-limit chunk-src]
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?start ?ts]
(chunk-src _ ?ts-chunk)
(vcf-src _ ?vcf-chunk)
(get-loc ?ts-chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?series)
(:distinct false)
(map ?series [:start-idx :series] :> ?start ?ts)
(p/blossom-chunk ?vcf-chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?vcf)
(>= ?vcf vcf-limit)))
(defn dynamic-filter
"Returns a new generator of ndvi and rain timeseries obtained by
filtering out all pixels with VCF less than the supplied
`vcf-limit`."
[ndvi-src reli-src rain-src]
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?start-idx ?ndvi-ts ?precl-ts ?reli-ts]
(ndvi-src ?s-res ?mod-h ?mod-v ?sample ?line ?n-start ?ndvi)
(reli-src ?s-res ?mod-h ?mod-v ?sample ?line ?r-start ?reli)
(rain-src ?s-res ?mod-h ?mod-v ?sample ?line ?p-start ?precl)
(schema/adjust ?p-start ?precl ?n-start ?ndvi ?r-start ?reli
:> ?start-idx ?precl-ts ?ndvi-ts ?reli-ts)
(:distinct false)))
(defn dynamic-tap
"Accepts an est-map, and sources for ndvi and rain timeseries and
vcf values split up by pixel."
[est-map dynamic-src]
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?new-start ?short ?break ?long ?t-stat]
(dynamic-src ?s-res ?mod-h ?mod-v ?sample ?line ?start ?ndvi ?precl ?reli)
(short-trend-shell est-map ?start ?ndvi ?reli :> ?new-start ?short)
(long-trend-shell est-map ?start ?ndvi ?reli ?precl :> _ ?break ?long ?t-stat)
(:distinct false)))
(defn forma-tap
"Accepts an est-map and sources for ndvi, rain, and fire timeseries,
plus a source of static vcf pixels."
[dynamic-src fire-src]
(<- [?s-res ?period ?mh ?mv ?s ?l ?forma-val]
(fire-src ?s-res ?mh ?mv ?s ?l !!fire)
(dynamic-src ?s-res ?mh ?mv ?s ?l ?start ?short ?break ?long ?t-stat)
(schema/forma-seq !!fire ?short ?break ?long ?t-stat :> ?forma-seq)
(p/index ?forma-seq :zero-index ?start :> ?period ?forma-val)
(:distinct false)))
(defmapcatop [process-neighbors [num-neighbors]]
"Processes all neighbors... Returns the index within the chunk, the
value, and the aggregate of the neighbors."
[window]
(for [[idx [val neighbors]] (->> (w/neighbor-scan num-neighbors window)
(map-indexed vector))
:when val]
[idx val (->> neighbors
(apply concat)
(filter identity)
(schema/combine-neighbors))]))
(defn forma-query
"final query that walks the neighbors and spits out the values."
[est-map forma-val-src]
(let [{:keys [neighbors window-dims]} est-map
[rows cols] window-dims
src (p/sparse-windower forma-val-src
["?sample" "?line"]
window-dims
"?forma-val"
nil)]
(<- [?s-res ?period ?mod-h ?mod-v ?sample ?line ?val ?neighbor-val]
(src ?s-res ?period ?mod-h ?mod-v ?win-col ?win-row ?window)
(process-neighbors [neighbors] ?window :> ?win-idx ?val ?neighbor-val)
(r/tile-position cols rows ?win-col ?win-row ?win-idx :> ?sample ?line)
(:distinct false))))
(defn beta-generator
"query to return the beta vector associated with each ecoregion"
[{:keys [t-res est-start ridge-const convergence-thresh max-iterations]}
dynamic-src static-src]
(let [first-idx (date/datetime->period t-res est-start)]
(<- [?s-res ?eco ?beta]
(dynamic-src ?s-res ?pd ?mod-h ?mod-v ?s ?l ?val ?neighbor-val)
(static-src ?s-res ?mod-h ?mod-v ?s ?l _ _ ?eco ?hansen)
(= ?pd first-idx)
(log/logistic-beta-wrap [ridge-const convergence-thresh max-iterations]
?hansen ?val ?neighbor-val :> ?beta)
(:distinct false))))
(defn forma-estimate
"query to end all queries: estimate the probabilities for each
period after the training period."
[beta-src dynamic-src static-src]
(<- [?s-res ?mod-h ?mod-v ?s ?l ?prob-series]
(beta-src ?s-res ?eco ?beta)
(dynamic-src ?s-res ?pd ?mod-h ?mod-v ?s ?l ?val ?neighbor-val)
(static-src ?s-res ?mod-h ?mod-v ?s ?l _ _ ?eco _)
(log/logistic-prob-wrap ?beta ?val ?neighbor-val :> ?prob)
(log/mk-timeseries ?pd ?prob :> ?prob-series)
(:distinct false)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment