Skip to content

Instantly share code, notes, and snippets.

(defmapop decode-raw-ga [json]
(let [m (clj-json.core/parse-string json true)]
[(:timestamp m)]))
(defn glob-json-data [path]
(let [tap (hfs-textline path)]
(<- [!timestamp]
(tap !line)
(decode-raw-ga !line :> !timestamp)
(:distinct false))))
@sorenmacbeth
sorenmacbeth / index.html
Created July 13, 2011 02:56
slopegraph
<!DOCTYPE html>
<html>
<head>
<title>Slopegraph.js</title>
<style type="text/css">
</style>
<script src="http://github.com/mbostock/d3/raw/master/d3.min.js"></script>
<script type="text/javascript">
var dataset=[[10, 100], [430, 441], [103, 89], [228, 62], [393, 258], [250, 167], [290, 322], [480, 17], [94, 27], [116, 163]]
var widthOfCanvas = '500';
(defn landings [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!ts" "!pvi" "!lo" "!ln" "!g_C" "!g_r" "!g_c" "!r"])
landing-sq (<- [!psn !datestr !g_C !g_r !g_c !kw !ref !lo !pvi ?c]
(ybtag !psn !ts !pvi !lo !ln !g_C !g_r !g_c !r)
(long->datestr !ts :> !datestr)
(parse-referrer !r :> !ref)
(extract-kw !r :> !kw)
(= !ln "true")
(c/count ?c))]
(<- [!psn !datestr !g_C !g_r !g_c !kw !r !lo ?s]
(defn do-join [root]
(let [pv (pageviews root)
lnd (landings root)
nv (new-visits root)
rv (return-visits root)
b (bounces root)]
(<- [?json]
(pv !pub !datestr !country !region !city !kw !ref !url !!pvs)
(lnd !pub !datestr !country !region !city !kw !ref !url !!lnds)
(nv !pub !datestr !country !region !city !kw !ref !url !!nvs)
(defn bounces [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!pvi" "!lo" "!g_C" "!g_r" "!g_c" "!r" "!b"])
b-sq (<- [!pub !pv-id !b]
(ybtag !pub !pv-id _ _ _ _ _ !b)
(not= !b nil))
rest-sq (<- [!pub !pv-id !country !region !city !kw !ref !url]
(ybtag !pub !pv-id !url !country !region !city !r _)
(not= !r nil)
(expand-fields !r :> !kw !ref))
bounces-sq (<- [!pub !country !region !city !kw !ref !url !pv-id ?c]
(defn generate-other-words [output-tap root date]
(let [others (generate-others root)
landings (landings-by-kw root)
sq (<- [?ngram ?other ?total-l ?total-pv ?total-b ?total-nv ?total-rv
?b-rate ?nv-rate ?rv-rate ?avg-depth]
(others ?ngram ?other !kw)
(landings !kw ?l ?pv ?b ?nv ?rv)
(ybot-stats ?l ?pv ?b ?nv ?rv :>
?total-l ?total-pv ?total-b ?total-nv ?total-rv
?b-rate ?nv-rate ?rv-rate ?avg-depth))]
def _launch_cluster_instances(self, instance_templates):
singleton_hosts = []
for instance_template in instance_templates:
instance_template.add_env_strings(singleton_hosts)
instances = self._launch_instances(instance_template)
if instance_template.number == 1:
if len(instances) != 1:
logger.error("Expected a single '%s' instance, but found %s.",
"".join(instance_template.roles), len(instances))
return
def launch_slaves(self, instance_template):
instances = self.cluster.check_running(NAMENODE, 1)
if not instances:
return
master = instances[0]
for role in (NAMENODE, JOBTRACKER):
singleton_host_env = "%s_HOST=%s" % \
(self._sanitize_role_name(role), master.public_ip)
instance_template.add_env_strings(singleton_host_env)
print singleton_host_env
(defn ngram-counts [root]
(let [ga (select-fields (gadata-tap root) "!kw")]
(<- [?ngram ?count]
(ga !kw)
(not= !kw "")
(gen-ngrams !kw 4 :> ?ngram)
(:sort ?ngram)
(c/count ?count))))
(defbufferop collapse-ngrams [tuples]
(defn landings [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!pvi" "!lo" "!ln" "!g_C" "!g_r" "!g_c" "!r" "!nv"])
landing-sq (<- [!pub !country !region !city !kw !ref !url !pv-id ?c]
(ybtag !pub !pv-id !url !ln !country !region !city !r !nv)
(not= !r nil)
(expand-fields !r :> !kw !ref)
(= !ln "true")
(= !nv "1")
(c/count ?c))]
(<- [!pub !country !region !city !kw !ref !url ?s]