Skip to content

Instantly share code, notes, and snippets.

View dlecocq's full-sized avatar

Dan Lecocq dlecocq

  • Albuquerque, NM
View GitHub Profile
from __future__ import print_function
# Imagine we're on a grid as point `(x, y)`. If we can only move down or left, one grid
# point at a time, how many unique paths can be made from `(x, y)` to the origin?
def paths(x, y):
# Your implementation here
pass
# As an example, this should be 56
#! /usr/bin/env python
from gevent import monkey
monkey.patch_all()
import time
import ujson as json
from functools import wraps
import gevent
@dlecocq
dlecocq / evince-bug.py
Created July 1, 2014 17:04
Infinite Loops in simhash-py, simhash-cpp
def duplicates(path):
'''Debug the duplicate detection from custom crawl'''
from simhash import Corpus
import simplejson as json
with open(path) as fin:
simhashes = json.load(fin)
corpus = Corpus(7, 5)
corpus.insert_bulk(simhashes)
for simhash in simhashes:
@dlecocq
dlecocq / queues.rb
Created January 21, 2014 22:32
This behave differently in jruby and MRI
#! /usr/bin/env ruby
@queues = 10.times.map { Array.new }
# An enumerator that yields `nil` if none of the queues has any items, but
# otherwise round-robins across them.
@foo = Enumerator.new do |enum|
loop do
found = false
@queues.each do |queue|
PATH
remote: .
specs:
qless (0.9.2)
redis (>= 2.2)
GEM
remote: http://rubygems.org/
specs:
addressable (2.3.4)
@dlecocq
dlecocq / 0-dependencies.sh
Last active December 17, 2015 09:18
Elasticsearch in Dallas
# Packages
sudo apt-get install -y make gcc g++ git rubygems screen ncdu iptraf unzip openjdk-7-jre-headless
# System-wide file descriptor limit
echo 'fs.file-max = 100000' | sudo tee -a /etc/sysctl.conf
# And unlimited memory locking
echo "dan soft memlock unlimited" | sudo tee -a /etc/security/limits.conf
echo "dan hard memlock unlimited" | sudo tee -a /etc/security/limits.conf
# Don't limit the number of processes
@dlecocq
dlecocq / 01-redis.sh
Last active December 14, 2015 12:09
Qless Setup
# You'll need a few tools to get you going
sudo apt-get update
sudo apt-get install -y make g++
# If you need redis, this should get you going
export REDIS_VERSION=2.6.7
cd && curl -O http://redis.googlecode.com/files/redis-$REDIS_VERSION.tar.gz
tar xf redis-$REDIS_VERSION.tar.gz
cd redis-$REDIS_VERSION
make && sudo make install
@dlecocq
dlecocq / topsy.txt
Last active December 11, 2015 00:09
Topsy hostname distribution
# Domain Portion Cumulative Count
ask.fm 0.12963 0.12963 107372
www.youtube.com 0.04967 0.17931 41142
is.gd 0.03277 0.21207 27140
tmi.me 0.01701 0.22909 14090
www.amazon.co.jp 0.01187 0.24096 9832
weheartit.com 0.00970 0.25066 8033
soundcloud.com 0.00744 0.25809 6161
pinterest.com 0.00721 0.26531 5975
adf.ly 0.00696 0.27226 5761
# Environment
#
# export HOSTNAME=...
# export CLUSTER=...
# export RACK=...
# export ESDIR=
# export AWS_ACCESS_ID=...
# export AWS_SECRET_KEY=...
# Packages
@dlecocq
dlecocq / resolve.py
Created November 21, 2012 18:59
Bulk Asynchronous DNS Resolution
import struct
from gevent import pool
from gevent.dns import resolve_ipv4
# Should be a file with one hostname per line
with open('hosts') as fin:
urls = fin.read().split('\n')
def func(host):
try: