Erik Bernhardsson erikbern

## gist:6312391
class BsddbTarget(luigi.LocalTarget):
    def open(self, mode):
        return bsddb.hashopen(self.path, mode) # TODO: make this atomic!!

class TCTarget(luigi.LocalTarget):
    open_tcs = {}

    def open(self, mode):
        if mode == 'r':
            if self.path not in TCTarget.open_tcs:

## gist:9628609
import luigi

# Here we are importing our own tasks, provided they are
# arranged in a python module (folder) named "components"
from components.SomeTaskA import SomeTaskA
from components.SomeTaskB import SomeTaskB
from components.SomeTaskC import SomeTaskC

# ------------------------------------------
# DEFINE THE MAIN WORKFLOW DEPENDENCY GRAPH

## gist:9811483
import os, shutil
import luigi
import sparkey
import random

class SparkeyTarget(luigi.Target):
    def __init__(self, path=None, spi='data.spi', spl='data.spl', writer_cls=sparkey.HashWriter, reader_cls=sparkey.HashReader):
        self.path = path
        self.spi_path = spi
        self.spl_path = spl

## gist:fc05e8cccd64dccde630
import random, time
import pylab, numpy

def method1(n):
    s = 1.0
    r = []
    for i in xrange(n):
        t = s * (1 - random.random() ** (1.0 / (n - i)))
        s -= t
        r.append(t)

## gist:8ddfbab33e1dae7014fe
def tabCounter() = {
	implicit def input = getInput()
	input.map(_.split('\t').size).reduce(_ + _)
}

val task = LuigiTask().requires(MyTsvJob(buildId)).output(HdfsTarget("output")).do(tabCounter)

val otherTask = LuigiTask().requires(task).output(HdfsTarget("output-2")).do(somethingElse)

otherTask.run()  // schedule task and otherTask

## gist:89fe7e2c1a615084ee6d
if (indices.size() <= (size_t)_K) {
  for (size_t i = 0; i < indices.size(); i++)
    m->children[i] = indices[i];
}

## gist:0f347c8d789402a09f2e
import subprocess, itertools, numpy
import matplotlib.pyplot as plt

command = 'git log --shortstat --log-size --format=oneline --no-merges'.split()
data = subprocess.check_output(command).split('\n')

def read_groups():
    buf = []
    for line in data:
        buf.append(line)

## gist:ba3456f836ccc9c044e8
function serializeArgs(args) {
  return JSON.stringify(args);
}

function unroll(f) {
  if (f._cache == undefined)
    f._cache = {};

  var f_new = function() {
    var key = serializeArgs(arguments);

## gist:4cff437097067142eca7
import numpy as np
import matplotlib.pyplot as plt

mean = np.array([1, 1, 1])
cov = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 0.5]])

people = np.random.multivariate_normal(mean, cov, 100000)

criterion = np.array([0, 0.2, 1.0])
scores = np.dot(people, criterion)

## gist:1f32359164e994ef0dd2
----------------------------------------------------------------------------------------
n_samples:  1000 n_features:  100
LSHF index build time:  0.0258071422577
ANNOY index build time:  0.012866973877
FLANN index build time:  0.00251913070679
Panns index build time:  3.09596705437
LSHF average query time:  0.00537742614746 , Average accuracy:  0.536
ANNOY average query time:  0.000244197845459 , Average accuracy:  0.9954
FLANN average query time:  0.000175647735596 , Average accuracy:  0.5824
Panns average query time:  0.0499544477463 , Average accuracy:  0.8956
	class BsddbTarget(luigi.LocalTarget):
	def open(self, mode):
	return bsddb.hashopen(self.path, mode) # TODO: make this atomic!!

	class TCTarget(luigi.LocalTarget):
	open_tcs = {}

	def open(self, mode):
	if mode == 'r':
	if self.path not in TCTarget.open_tcs:
	import luigi

	# Here we are importing our own tasks, provided they are
	# arranged in a python module (folder) named "components"
	from components.SomeTaskA import SomeTaskA
	from components.SomeTaskB import SomeTaskB
	from components.SomeTaskC import SomeTaskC

	# ------------------------------------------
	# DEFINE THE MAIN WORKFLOW DEPENDENCY GRAPH
	import os, shutil
	import luigi
	import sparkey
	import random

	class SparkeyTarget(luigi.Target):
	def __init__(self, path=None, spi='data.spi', spl='data.spl', writer_cls=sparkey.HashWriter, reader_cls=sparkey.HashReader):
	self.path = path
	self.spi_path = spi
	self.spl_path = spl
	import random, time
	import pylab, numpy

	def method1(n):
	s = 1.0
	r = []
	for i in xrange(n):
	t = s * (1 - random.random() ** (1.0 / (n - i)))
	s -= t
	r.append(t)
	def tabCounter() = {
	implicit def input = getInput()
	input.map(_.split('\t').size).reduce(_ + _)
	}

	val task = LuigiTask().requires(MyTsvJob(buildId)).output(HdfsTarget("output")).do(tabCounter)

	val otherTask = LuigiTask().requires(task).output(HdfsTarget("output-2")).do(somethingElse)

	otherTask.run() // schedule task and otherTask
	if (indices.size() <= (size_t)_K) {
	for (size_t i = 0; i < indices.size(); i++)
	m->children[i] = indices[i];
	}
	import subprocess, itertools, numpy
	import matplotlib.pyplot as plt

	command = 'git log --shortstat --log-size --format=oneline --no-merges'.split()
	data = subprocess.check_output(command).split('\n')

	def read_groups():
	buf = []
	for line in data:
	buf.append(line)
	function serializeArgs(args) {
	return JSON.stringify(args);
	}

	function unroll(f) {
	if (f._cache == undefined)
	f._cache = {};

	var f_new = function() {
	var key = serializeArgs(arguments);
	import numpy as np
	import matplotlib.pyplot as plt

	mean = np.array([1, 1, 1])
	cov = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 0.5]])

	people = np.random.multivariate_normal(mean, cov, 100000)

	criterion = np.array([0, 0.2, 1.0])
	scores = np.dot(people, criterion)
	----------------------------------------------------------------------------------------
	n_samples: 1000 n_features: 100
	LSHF index build time: 0.0258071422577
	ANNOY index build time: 0.012866973877
	FLANN index build time: 0.00251913070679
	Panns index build time: 3.09596705437
	LSHF average query time: 0.00537742614746 , Average accuracy: 0.536
	ANNOY average query time: 0.000244197845459 , Average accuracy: 0.9954
	FLANN average query time: 0.000175647735596 , Average accuracy: 0.5824
	Panns average query time: 0.0499544477463 , Average accuracy: 0.8956