Skip to content

Instantly share code, notes, and snippets.

from redis import Redis
import simplejson
class Resque(object):
"""Dirt simple Resque client in Python. Can be used to create jobs."""
redis_server = 'localhost:6379'
def __init__(self):
host, port = self.redis_server.split(':')
self.redis = Redis(host=host, port=int(port))
View gist:416502
SELECT 1.0 + SUM((1.0 + #{p} * tunkrank_score) / (1.0 + num_friends)) AS tunkrank_score
FROM twitter_users
INNER JOIN twitter_id_follows ON (twitter_users.twitter_id = twitter_id_follows.follower_twitter_id)
WHERE twitter_id_follows.user_twitter_id = #{twitter_id};
View tunkrank.rb
class TwitterUser
def calculate_tunkrank(p=0.05)
self.followers.inject(0.0) do |sum, follower|
sum + ((1.0 + (p * follower.tunkrank_score)) / (1.0 + follower.num_friends))
sorenmacbeth / gist:827971
Created Feb 15, 2011 — forked from michaelmontano/gist:535794
updated to whirr-0.3.0
View gist:827971
diff -Naur whirr-0.3.0-incubating/contrib/python/src/py/hadoop/cloud/ whirr-0.3.0-incubating-backtype/contrib/python/src/py/hadoop/cloud/
--- whirr-0.3.0-incubating/contrib/python/src/py/hadoop/cloud/ 2011-01-15 23:03:44.000000000 -0800
+++ whirr-0.3.0-incubating-backtype/contrib/python/src/py/hadoop/cloud/ 2011-02-15 11:51:49.000000000 -0800
@@ -296,7 +296,7 @@
opt.get('availability_zone'), opt.get('user_packages'),
opt.get('auto_shutdown'), opt.get('env'),
- opt.get('security_group'))
+ opt.get('security_group'), opt.get('spot_price'))
service.launch_master(template, config_dir, opt.get('client_cidr'))
View globhfs.clj
(ns gist.globhfs
(:import [cascading.tap GlobHfs]))
;; ### Bucket to Cluster
;;; To get tuples back out of our directory structure on S3, we employ
;; Cascading's [GlobHFS] ( tap, along with an
;; interface tailored for datasets stored in the MODIS sinusoidal
;; projection. For details on the globbing syntax, see
;; [here](
View gist:1030408
(defmapop decode-raw-ga [json]
(let [m (clj-json.core/parse-string json true)]
[(:timestamp m)]))
(defn glob-json-data [path]
(let [tap (hfs-textline path)]
(<- [!timestamp]
(tap !line)
(decode-raw-ga !line :> !timestamp)
(:distinct false))))
View index.html
<!DOCTYPE html>
<style type="text/css">
<script src=""></script>
<script type="text/javascript">
var dataset=[[10, 100], [430, 441], [103, 89], [228, 62], [393, 258], [250, 167], [290, 322], [480, 17], [94, 27], [116, 163]]
var widthOfCanvas = '500';
View gist:1093907
(defn landings [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!ts" "!pvi" "!lo" "!ln" "!g_C" "!g_r" "!g_c" "!r"])
landing-sq (<- [!psn !datestr !g_C !g_r !g_c !kw !ref !lo !pvi ?c]
(ybtag !psn !ts !pvi !lo !ln !g_C !g_r !g_c !r)
(long->datestr !ts :> !datestr)
(parse-referrer !r :> !ref)
(extract-kw !r :> !kw)
(= !ln "true")
(c/count ?c))]
(<- [!psn !datestr !g_C !g_r !g_c !kw !r !lo ?s]
View gist:1112457
(defn do-join [root]
(let [pv (pageviews root)
lnd (landings root)
nv (new-visits root)
rv (return-visits root)
b (bounces root)]
(<- [?json]
(pv !pub !datestr !country !region !city !kw !ref !url !!pvs)
(lnd !pub !datestr !country !region !city !kw !ref !url !!lnds)
(nv !pub !datestr !country !region !city !kw !ref !url !!nvs)
View shaper.clj
(defn bounces [root]
(let [ybtag (select-fields (ybtag-tap root) ["!psn" "!pvi" "!lo" "!g_C" "!g_r" "!g_c" "!r" "!b"])
b-sq (<- [!pub !pv-id !b]
(ybtag !pub !pv-id _ _ _ _ _ !b)
(not= !b nil))
rest-sq (<- [!pub !pv-id !country !region !city !kw !ref !url]
(ybtag !pub !pv-id !url !country !region !city !r _)
(not= !r nil)
(expand-fields !r :> !kw !ref))
bounces-sq (<- [!pub !country !region !city !kw !ref !url !pv-id ?c]