Skip to content

Instantly share code, notes, and snippets.

View bnyeggen's full-sized avatar

Bryce Nyeggen bnyeggen

View GitHub Profile
@bnyeggen
bnyeggen / raid_mtbf.py
Created July 11, 2011 22:54
A RAID MTBF calculator
#redundancy is the max number of survivable failures, so eg 1 for RAID5
#mtbf_array is an array of either actual mean-time-between-failures, or a nested RAID array
# RAID([100]*7,2) #7 disk RAID 6
# RAID([RAID([100]*3,1),RAID([1000]*3,1)],0) # RAID 50, 2 arrays of 3
# RAID([100,100,50,50],1) #RAID 5 with varying reliabilities
from random import random
class RAID(object):
@bnyeggen
bnyeggen / multiprocess_with_instance_methods.py
Created July 16, 2011 14:17
Example showing how to use instance methods with the multiprocessing module
from multiprocessing import Pool
from functools import partial
def _pickle_method(method):
func_name = method.im_func.__name__
obj = method.im_self
cls = method.im_class
if func_name.startswith('__') and not func_name.endswith('__'): #deal with mangled names
cls_name = cls.__name__.lstrip('_')
func_name = '_' + cls_name + func_name
@bnyeggen
bnyeggen / clojure_hive_jdbc.clj
Created December 13, 2011 15:12
Clojure to Hive via JDBC
(comment Add [org.clojure/java.jdbc "0.1.1"] to project dependencies)
(ns myproject.core
(:use [clojure.java.jdbc :only [with-connection, with-query-results]]))
(let [db-host "MyHost"
db-port 10000
db-name "default"]
(def db {:classname "org.apache.hadoop.hive.jdbc.HiveDriver" ; must be in classpath
:subname (str "//" db-host ":" db-port "/" db-name)
@bnyeggen
bnyeggen / clojure_hive_thrift.clj
Created December 13, 2011 15:28
Clojure to Hive via Thrift
(comment You will want just about everything in your hive/lib dir included in your Classpath)
(ns myproj.core
(:import [org.apache.hadoop.hive.service HiveClient]
[org.apache.thrift.transport TSocket]
[org.apache.thrift.protocol TBinaryProtocol]))
(defn send-hive
"Creates a new socket and Hive client connection, runs the query, pulls the result, and closes the connection.
Eventually modify to split and parse according to schema of result.
@bnyeggen
bnyeggen / load-csv.clj
Created December 29, 2011 14:36
Wrong and right way to parse a file in Clojure
;dummy.csv is of format "1,2,3\n4,5,6\n"
;fails since everything is lazy (but works if you do something that forces resolution)
(with-open [r (reader "file:///home/brycen/dummy.csv")]
(for [lines (read-csv r)]
(zipmap [:x :y :z] lines)))
;Doesn't close "properly" (but works most of the time)
(for [lines (read-csv (reader "file:///home/brycen/dummy.csv"))]
(zipmap [:x :y :z] lines))
@bnyeggen
bnyeggen / flexigroup.clj
Created February 27, 2012 18:04
More flexible group-by
(defn flexigroup
"Like group-by, but allows arbitrary calculation of keys and values from
source coll, (for instance, to roll up distinct particular elements of a
vector by distinct other parts) and arbitrary combinations of old and new
vectors (for instance, to perform an efficient online count).
(flexigroup identity identity conj [] coll) == group-by
(flexigroup identity identity (fn [a b] (inc a)) 0) == online count
(flexigroup #(subvec % 0 1) #(subvec % 1) conj #{} coll) == all the
distinct rests of the vectors, grouped by the first element"
[key-f val-f combine-f init-v coll]
@bnyeggen
bnyeggen / switch.clj
Created March 12, 2012 15:22
Switch values of two refs/atoms/agents
(defn switch-refs
"Must be called in transaction"
[a b]
(let [av @a bv @b
setter (fn [_ v] v)]
(commute a setter bv)
(commute b setter av)))
(defn switch!
"Blocks on both sequentially, nontransactional"
@bnyeggen
bnyeggen / intern.clj
Created April 9, 2012 21:26
Interning with Clojure
(defn intern-map
[m]
(let [val-set (set (vals m))]
(into {}
(for [[k v] m]
{k (val-set v)}))))
(defn make-duped-hashmap []
(into {}
(for [k (range 10000000)]
@bnyeggen
bnyeggen / with-intern.clj
Created April 10, 2012 01:53
More interning with Clojure
(defmacro with-interns
"interns => [intern1 intern2...]
Evaluates body while making available many functions, bound to the
symbols in interns. Each fn, when called, returns a deduped reference to
its argument. The deduplication is with respect to any previously-called
arguments to that fn.
(with-interns [intern]
(into {}
(for [k (range 100000)]
@bnyeggen
bnyeggen / distinctify.clj
Created August 31, 2012 13:17
Three ways of distinctifying a sequence and returning a vec
(defn unique-vec1
"Straight-up Clojure sequence traversal"
[s]
(vec (set s)))
(defn unique-vec2
"Conj into transient set and vectorize"
[s]
(loop [remaining s
seen (transient #{})]