Skip to content

Instantly share code, notes, and snippets.

(defn generate-other-words [output-tap root date]
(let [others (generate-others root)
landings (landings-by-kw root)
sq (<- [?ngram ?other ?total-l ?total-pv ?total-b ?total-nv ?total-rv
?b-rate ?nv-rate ?rv-rate ?avg-depth]
(others ?ngram ?other ?kw)
(landings ?kw ?l ?pv ?b ?nv ?rv)
(ybot-stats ?l ?pv ?b ?nv ?rv :>
?total-l ?total-pv ?total-b ?total-nv ?total-rv
?b-rate ?nv-rate ?rv-rate ?avg-depth))]
(defn generate-others [root]
(let [ga (select-fields (gadata-tap root) ["!kw"])
kw-stats (select-fields (kw-stats-tap root) "?ngram")]
(<- [?ngram ?other ?kw]
(ga ?kw)
(kw-stats ?ngram) ;join on kw-stats to filter
(not= ?kw "")
(other-ngrams ?kw 4 :> ?ngram ?other))))
(defn generate-other-words [output-tap root date]
(gen-class :name ybot.transfer.GlobPathLister
:implements [backtype.hadoop.PathLister]
:prefix "lister-")
(defn lister-getFiles [this ^FileSystem fs ^Path p]
(for [status (.globStatus fs p)]
[(.getPath status)]))
(gen-class :name ybot.hadoop.formats.SimpleRecordStreamFactory
:implements backtype.hadoop.formats.RecordStreamFactory
:prefix "recordfactory-")
(defn recordfactory-getInputStream [^FileSystem fs ^Path p]
(SimpleInputStream. (.open fs p)))
(defn recordfactory-getOutputStream [^FileSystem fs ^Path p]
(SimpleOutputStream. (.create fs p)))
(ns ybot.kwphrase-tests
(:use [ybot.analytics.ga kwphrase]
[ybot datastores]
[midje sweet cascalog]))
(let [ga-data [["20121025"
"http://dopeness.org"
"United States"
"Oregon"
"Portland"
(def sample-variance
"Predicate macro that calculates the sample variance of the supplied input
var."
(<- [!val :> !var]
(* !val !val :> !squared)
(c/sum !squared :> !squared-sum)
(c/count !count)
(c/sum !val :> !sum)
(c/avg !val :> !mean)
(* !sum !mean :> !i)
package forma.tap;
import backtype.hadoop.pail.PailStructure;
import java.util.Collections;
import java.util.List;
import org.apache.thrift.TBase;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
@sorenmacbeth
sorenmacbeth / core.clj
Created March 5, 2012 20:39
you have to add and namespaces with (gen-class) forms to the :aot key in your leiningen project file. Next you have make sure you do a `lein compile` to actually generate the class
(ns lein-maybe-bug.core
(:import leinmaybebug.SomeClass))
(gen-class :name lein-maybe-bug.SomeGenClass
:prefix "somegenclass-")
(defn somegenclass-test [this]
(SomeClass.))
(let [pedigree (help/mk-pedigree (co/to-long (t/date-time 1979 11 13)))
session-path [[1 "http://dopeness.org/" "http://dopeness.org/" 1]
[2 "http://dopeness.org/" "http://dopeness.org/3" 3]]
pageviews [[1 "http://dopeness.org/" pedigree]
[2 "http://dopeness.org/" pedigree]
[2 "http://dopeness.org/2" pedigree]
[2 "http://dopeness.org/3" pedigree]]
visit-types [[1 true pedigree]
[2 false pedigree]]]
(fact
(defn counts-per-day [intent-tap]
(<- [!intent !bucket !count]
(intent-tap _ !intent !ped)
(g/extract-true-as-of !ped :> !ms)
(o/time-buckets ["d"] !ms :> !bucket)
(c/count !count)))
(defn avg-sd [counts-per-day-sq min-avg]
(<- [!intent !a !sd]
(counts-per-day-sq !intent !bucket !count)