Skip to content

Instantly share code, notes, and snippets.

(defn generate-other-words [output-tap root date]
(let [others (generate-others root)
landings (landings-by-kw root)
sq (<- [?ngram ?other ?total-l ?total-pv ?total-b ?total-nv ?total-rv
?b-rate ?nv-rate ?rv-rate ?avg-depth]
(others ?ngram ?other !kw)
(landings !kw ?l ?pv ?b ?nv ?rv)
(ybot-stats ?l ?pv ?b ?nv ?rv :>
?total-l ?total-pv ?total-b ?total-nv ?total-rv
?b-rate ?nv-rate ?rv-rate ?avg-depth))]
def _launch_cluster_instances(self, instance_templates):
singleton_hosts = []
for instance_template in instance_templates:
instance_template.add_env_strings(singleton_hosts)
instances = self._launch_instances(instance_template)
if instance_template.number == 1:
if len(instances) != 1:
logger.error("Expected a single '%s' instance, but found %s.",
"".join(instance_template.roles), len(instances))
return
def launch_slaves(self, instance_template):
instances = self.cluster.check_running(NAMENODE, 1)
if not instances:
return
master = instances[0]
for role in (NAMENODE, JOBTRACKER):
singleton_host_env = "%s_HOST=%s" % \
(self._sanitize_role_name(role), master.public_ip)
instance_template.add_env_strings(singleton_host_env)
print singleton_host_env
(defn ngram-counts [root]
(let [ga (select-fields (gadata-tap root) "!kw")]
(<- [?ngram ?count]
(ga !kw)
(not= !kw "")
(gen-ngrams !kw 4 :> ?ngram)
(:sort ?ngram)
(c/count ?count))))
(defbufferop collapse-ngrams [tuples]
(defn landings [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!pvi" "!lo" "!ln" "!g_C" "!g_r" "!g_c" "!r" "!nv"])
landing-sq (<- [!pub !country !region !city !kw !ref !url !pv-id ?c]
(ybtag !pub !pv-id !url !ln !country !region !city !r !nv)
(not= !r nil)
(expand-fields !r :> !kw !ref)
(= !ln "true")
(= !nv "1")
(c/count ?c))]
(<- [!pub !country !region !city !kw !ref !url ?s]
@sorenmacbeth
sorenmacbeth / shaper_tests.clj
Created September 30, 2011 23:37
the first fact using provided suceeds, but the second using against-background fails
(ns ybot.shaper-tests
(:use cascalog.api
ybot.analytics.yb.shaper
[ybot datastores]
[midje sweet cascalog]))
(let [tag-data [["jmblog"
"07409273223006096"
"http://www.optimizeandprophesize.com/jonathan_mendezs_blog/2007/02/optimize_your_y.html"
"United States"
Actual result did not agree with the checking function.
Actual result: java.lang.RuntimeException: java.lang.RuntimeException: java.lang.RuntimeException: java.lang.IllegalArgumentException: clojure.lang.LazySeq@745f is an invalid predicate.
cascalog.rules$gen_as_set_ungrounding_vars.invoke(rules.clj:509)
cascalog.rules$pred_clean_BANG_.invoke(rules.clj:527)
cascalog.rules$build_query.invoke(rules.clj:553)
cascalog.rules$build_rule.invoke(rules.clj:641)
ybot.analytics.yb.shaper$return_visits.invoke(shaper.clj:63)
ybot.shaper_tests$eval3695$fn__3696$fn__3697$fn__3698$fn__3701.invoke(shaper_tests.clj:35)
ybot.shaper_tests$eval3695$fn__3696$fn__3697$fn__3698.invoke(shaper_tests.clj:35)
ybot.shaper_tests$eval3695$fn__3696$fn__3697.invoke(shaper_tests.clj:35)
(let [src [["http://www.google.co.in.../search?hl=en&source=hp&q=farewell quotes&meta=&oq=farewell &aq=0&aqi=g10&aql=&gs_sm=c&gs_upl=1235l2985l0l6500l9l9l0l2l2l0l234l1451l0.2.5l7l0"]
["http://www.google.com/search?q=farewell%20quotes"]
["http://www.dopeness.org/foo"]
[""]]]
(fact?<-
[[nil nil] ["www.google.com" "farewell quotes"] ["www.dopeness.org" ""] ["(direct)" ""]]
[!ref !kw]
(src ?r)
(expand-fields ?r :> !kw !ref)))
(ns hbase.cascalog.core
(:require [cascalog.workflow :as w])
(:import [cascading.hbase HBaseTap HBaseScheme ByteHolder]
[cascading.tuple Fields]
org.apache.hadoop.hbase.util.Bytes))
(defn hbase-tap [table-name key-field column-family & value-fields]
(let [scheme (HBaseScheme. (w/fields key-field) column-family (w/fields value-fields))]
(HBaseTap. table-name scheme)))
Caused by: java.lang.NoSuchMethodError: clojure.lang.RT.keyword(Ljava/lang/String;Ljava/lang/String;)Lclojure/lang/Keyword;
at cascalog.ops__init.__init0(Unknown Source)
at cascalog.ops__init.<clinit>(Unknown Source)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at clojure.lang.RT.loadClassForName(RT.java:1578)
at clojure.lang.RT.load(RT.java:399)
at clojure.lang.RT.load(RT.java:381)
at clojure.core$load$fn__4519.invoke(core.clj:4915)
at clojure.core$load.doInvoke(core.clj:4914)