Skip to content

Instantly share code, notes, and snippets.

View mynameisfiber's full-sized avatar

Micha Gorelick mynameisfiber

View GitHub Profile
@mynameisfiber
mynameisfiber / jsonfiledata.py
Created March 15, 2016 16:11
One big file, many data splits
class JSONFileData(object):
def __init__(self, fd, split, num_samples=None):
assert sum(split.values()) == 1
self.fd = fd
self.split = split
self.num_samples = num_samples
self._parse_lines()
def _parse_lines(self):
if self.num_samples is None:
@mynameisfiber
mynameisfiber / context_timer.py
Created January 13, 2016 22:12
Times code within a contextmanager with useful stack information in output
import time
import contextlib
import inspect
_DEBUG = False
@contextlib.contextmanager
def Timer(name, debug=_DEBUG):
if not debug:
@mynameisfiber
mynameisfiber / thesession.py
Last active March 25, 2016 10:23
thesession.org ABC file scraper
#!/usr/bin/env python2.7
"""
Scrape thesession.org for all the yummy ABC files
"""
import os
import itertools as IT
from collections import deque
from operator import itemgetter
from urlparse import urljoin
@mynameisfiber
mynameisfiber / multigram_search.py
Last active August 29, 2015 14:10
MultigramSearch looks up instances of ngrams within a target text
#!/usr/bin/env python2.7
"""
>>> import multigram_search
>>> mgs = multigram_search.MultigramSearch([("a", "b", "c"), ("o", "c", "z"),
('z','y')])
>>> list(mgs.intersection("hello world you a b c foo".split(" ")))
[['a', 'b', 'c']]
$ # Fails on port 80
$ curl -i -N -H "Connection: Upgrade" -H "Upgrade: websocket" -H "Host: XXX.fastforwardlabs.com" -H "Origin: http://XXX.fastforwardlabs.com" http://XXX.fastforwardlabs.com/XXXX
HTTP/1.1 400 Bad Request
Server: nginx/1.4.6 (Ubuntu)
Date: Mon, 20 Oct 2014 23:22:48 GMT
Content-Type: text/html; charset=UTF-8
Content-Length: 34
Can "Upgrade" only to "WebSocket".
@mynameisfiber
mynameisfiber / neighborhoods.json
Created August 13, 2014 15:16
Centroids for neighborhoods in Manhattan
[
{
"name": "Lower East Side",
"center": [
-73.9836180698,
40.7155233255
]
},
{
"name": "Marble Hill",
# Why does this work
def split_chars(words, chars):
itr = (words,)
for c in chars:
itr = tuple(chunk for word in itr for chunk in word.split(c))
return list(itr)
# but this not work
def split_chars_broken(words, chars):
itr = (words,)
@mynameisfiber
mynameisfiber / supernone.py
Created July 21, 2014 17:50
SuperNone -- The most None a None type can get.
from functools import total_ordering
@total_ordering
class SuperNone(object):
# general properties / methods
def __getattr__(self, *args, **kwargs):
return self
def __call__(self, *args, **kwargs):
return self
def __setattr__(self, *args, **kwargs):
@mynameisfiber
mynameisfiber / s4lru.py
Last active July 20, 2016 20:20
Short and Simple S4LRU Cache
from collections import OrderedDict
class S4LRU(object):
"""
Short and Simple [S4LRU][1]cache. Implemented by Micha Gorelick
(http://github.com/mynameisfiber) and released under the do whatever you
want license.
[1] http://www.cs.cornell.edu/~qhuang/papers/sosp_fbanalysis.pdf
"""
@mynameisfiber
mynameisfiber / shared_numpy.py
Created April 15, 2014 18:21
The pains of multiprocessing and copy-on-write
import numpy as np
import multiprocessing
import ctypes
import mmap
data_normal = np.zeros((4, 10), dtype=np.uint8)
# This shows the important piece in this whole mystery has been the MAP_SHARED
# memory flag. Normally, memory that is compied when the process os.fork()'s
# is set to copy-on-write which essentially makes each fork has it's own