Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
#
# Python code for http://www.cloudera.com/blog/2009/06/17/analyzing-apache-logs-with-piganalyzing-apache-logs-with-pig/
import sys
import math
def rescale(values, low=0, high=4095):
"""Linearly rescales values to be strictly between low and high."""
maxval = max(values)
import random
class Markov(object):
def __init__(self, open_file):
self.cache = {}
self.open_file = open_file
self.words = self.file_to_words()
self.word_size = len(self.words)
self.database()
(defn histogram
[filename]
(reduce
(fn [accum x]
(let [h (Math/floor (/ x 10))]
(assoc accum h (inc (get accum h 0)))))
{}
(take-nth 2 (drop 1 (read-lines filename)))))
@pib
pib / sexp.py
Created November 23, 2009 07:57
A simple Python s-expression parser.
from string import whitespace
atom_end = set('()"\'') | set(whitespace)
def parse(sexp):
stack, i, length = [[]], 0, len(sexp)
while i < length:
c = sexp[i]
print c, stack
# Power law (log tail) distribution
# Copyright(C) 2010 Salvatore Sanfilippo
# this code is under the public domain
# min and max are both inclusive
# n is the distribution power: the higher, the more biased
def powerlaw(min,max,n)
max += 1
pl = ((max**(n+1) - min**(n+1))*rand() + min**(n+1))**(1.0/(n+1))
(max-1-pl.to_i)+min
# Config for Nginx to act as a front-end for Riak
# The main goal is to proxy all GETs directly to Riak, and disallow anything else (POST, PUT, etc)
# Also, disallow use of the map/reduce query links (i.e. /riak/bucket/key/_,_,_)
# Config is in /etc/nginx/sites-available/default or somewhere like that
# Set up load-balancing to send requests to all nodes in the Riak cluster
# Replace these IPs/ports with the locations of your Riak nodes
upstream riak_hosts {
server 127.0.0.1:8098;
import gevent
from thrift.server.TServer import TServer
# XXX Hackish, but should be safe: monkey patch gevent socket support into
# Thrift. Overall I think this is cleaner than reimplementing all of TSocket.
from thrift.transport import TSocket; TSocket.socket = gevent.socket
from thrift.transport.TTransport import TTransportException
class TGEventServer(TServer):
"""Gevent socket server."""
anonymous
anonymous / notifo-alert
Created April 24, 2010 00:54
Sample code for sending self notifications to notifo
#!/usr/bin/env python
# This code is part of the public domain
# user settings
USERNAME = "__username__goes__here__"
API_SECRET = "__secret__api__key__goes__here__"
# notify IO Info
URL = "https://%s:%s@api.notifo.com/v1/send_notification" % (USERNAME, API_SECRET)
@rcrowley
rcrowley / modpython.conf
Created June 22, 2010 04:39
Redis in Ganglia
modules {
module {
name = "python_module"
path = "/usr/lib/ganglia/modpython.so"
params = "/usr/lib/ganglia/python_modules"
}
}
include ('/etc/ganglia/conf.d/*.pyconf')
[program:beerscore]
command=/usr/local/bin/php beerscore_bot.php
numprocs=1
directory=/home/sean/findpint/twitterbot
stdout_logfile=/home/sean/findpint/twitterbot/supervisord.log
environment=BEERSCORE_USER=beerscore,BEERSCORE_PASS=[redacted]
autostart=true
autorestart=true
user=beerscore