Skip to content

Instantly share code, notes, and snippets.

from redis import Redis
import simplejson
class Resque(object):
"""Dirt simple Resque client in Python. Can be used to create jobs."""
redis_server = 'localhost:6379'
def __init__(self):
host, port = self.redis_server.split(':')
self.redis = Redis(host=host, port=int(port))
SELECT 1.0 + SUM((1.0 + #{p} * tunkrank_score) / (1.0 + num_friends)) AS tunkrank_score
FROM twitter_users
INNER JOIN twitter_id_follows ON (twitter_users.twitter_id = twitter_id_follows.follower_twitter_id)
WHERE twitter_id_follows.user_twitter_id = #{twitter_id};
class TwitterUser
def calculate_tunkrank(p=0.05)
self.followers.inject(0.0) do |sum, follower|
sum + ((1.0 + (p * follower.tunkrank_score)) / (1.0 + follower.num_friends))
end
end
end
@sorenmacbeth
sorenmacbeth / gist:827971
Created February 15, 2011 18:37 — forked from michaelmontano/gist:535794
updated to whirr-0.3.0
diff -Naur whirr-0.3.0-incubating/contrib/python/src/py/hadoop/cloud/cli.py whirr-0.3.0-incubating-backtype/contrib/python/src/py/hadoop/cloud/cli.py
--- whirr-0.3.0-incubating/contrib/python/src/py/hadoop/cloud/cli.py 2011-01-15 23:03:44.000000000 -0800
+++ whirr-0.3.0-incubating-backtype/contrib/python/src/py/hadoop/cloud/cli.py 2011-02-15 11:51:49.000000000 -0800
@@ -296,7 +296,7 @@
opt.get('user_data_file'),
opt.get('availability_zone'), opt.get('user_packages'),
opt.get('auto_shutdown'), opt.get('env'),
- opt.get('security_group'))
+ opt.get('security_group'), opt.get('spot_price'))
service.launch_master(template, config_dir, opt.get('client_cidr'))
(ns gist.globhfs
(:import [cascading.tap GlobHfs]))
;; ### Bucket to Cluster
;;
;;; To get tuples back out of our directory structure on S3, we employ
;; Cascading's [GlobHFS] (http://goo.gl/1Vwdo) tap, along with an
;; interface tailored for datasets stored in the MODIS sinusoidal
;; projection. For details on the globbing syntax, see
;; [here](http://goo.gl/uIEzu).
(defmapop decode-raw-ga [json]
(let [m (clj-json.core/parse-string json true)]
[(:timestamp m)]))
(defn glob-json-data [path]
(let [tap (hfs-textline path)]
(<- [!timestamp]
(tap !line)
(decode-raw-ga !line :> !timestamp)
(:distinct false))))
@sorenmacbeth
sorenmacbeth / index.html
Created July 13, 2011 02:56
slopegraph
<!DOCTYPE html>
<html>
<head>
<title>Slopegraph.js</title>
<style type="text/css">
</style>
<script src="http://github.com/mbostock/d3/raw/master/d3.min.js"></script>
<script type="text/javascript">
var dataset=[[10, 100], [430, 441], [103, 89], [228, 62], [393, 258], [250, 167], [290, 322], [480, 17], [94, 27], [116, 163]]
var widthOfCanvas = '500';
(defn landings [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!ts" "!pvi" "!lo" "!ln" "!g_C" "!g_r" "!g_c" "!r"])
landing-sq (<- [!psn !datestr !g_C !g_r !g_c !kw !ref !lo !pvi ?c]
(ybtag !psn !ts !pvi !lo !ln !g_C !g_r !g_c !r)
(long->datestr !ts :> !datestr)
(parse-referrer !r :> !ref)
(extract-kw !r :> !kw)
(= !ln "true")
(c/count ?c))]
(<- [!psn !datestr !g_C !g_r !g_c !kw !r !lo ?s]
(defn do-join [root]
(let [pv (pageviews root)
lnd (landings root)
nv (new-visits root)
rv (return-visits root)
b (bounces root)]
(<- [?json]
(pv !pub !datestr !country !region !city !kw !ref !url !!pvs)
(lnd !pub !datestr !country !region !city !kw !ref !url !!lnds)
(nv !pub !datestr !country !region !city !kw !ref !url !!nvs)
(defn bounces [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!pvi" "!lo" "!g_C" "!g_r" "!g_c" "!r" "!b"])
b-sq (<- [!pub !pv-id !b]
(ybtag !pub !pv-id _ _ _ _ _ !b)
(not= !b nil))
rest-sq (<- [!pub !pv-id !country !region !city !kw !ref !url]
(ybtag !pub !pv-id !url !country !region !city !r _)
(not= !r nil)
(expand-fields !r :> !kw !ref))
bounces-sq (<- [!pub !country !region !city !kw !ref !url !pv-id ?c]