Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/python
# -*- coding: utf-8 -*-
from pprint import pprint, pformat
import random
def randomize(items):
randomized = []
while 0 < len(items):
idx = random.randint(0, len(items)-1)
@tomotaka
tomotaka / stream_text_processor.py
Last active August 29, 2015 13:56
cat test.txt | python ./stream_text_processor.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import hashlib
import gevent
import gevent.queue
import gevent.fileobject as gfo
@tomotaka
tomotaka / schwartz.java
Created June 23, 2014 07:18
schwartz.java
class Tuple {
public Object obj;
public int idx;
Tuple(Object o, int i) { this.obj = o; this.idx = i; }
}
List<Tuple> tmp = new ArrayList<Tuple>();
int i = 0;
for (Object obj : objects) {
tmp.add(new Tuple(obj, i++));
}
@tomotaka
tomotaka / concurrentcrawler.py
Last active August 29, 2015 14:13
concurrent crawler using gevent
import gevent.queue as gq
class ConcurrentCrawler(object):
def __init__(self, func=crawl_all, concurrency=10, q_max=100):
self._func = func
self._concurrency = concurrency
self._q_max = q_max
self._queue = gq.Queue(maxsize=self._q_max)
self._workers = None
@tomotaka
tomotaka / sqlitedict_vs_plyvel.py
Last active August 29, 2015 14:20
sqlitedict vs plyvel(LevelDB)
# -*- coding: utf-8 -*-
import time
import hashlib
import os
from contextlib import contextmanager
import shutil
import plyvel
from sqlitedict import SqliteDict
@tomotaka
tomotaka / calc_mean_bench.py
Last active August 29, 2015 14:26
mean calculation benchmark
import numpy as np
import random
import time
from contextlib import contextmanager
data = []
for i in xrange(300000):
data.append(random.randint(1, 100000000))
@tomotaka
tomotaka / ldbdump.py
Created September 17, 2015 07:34
json-lines dumper of leveldb
# -*- coding: utf-8 -*-
import time
# pip install click plyvel simplejson
import click
import plyvel
import simplejson as json
@click.command()
@tomotaka
tomotaka / stdout_python_logging_sample
Created September 24, 2015 09:59
stdout python logging sample
# -*- coding: utf-8 -*-
import logging
import sys
def main():
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('[%(levelname)s] %(message)s (%(filename)s:%(lineno)s)')
@tomotaka
tomotaka / file_output_logging_sample.py
Created September 24, 2015 10:05
python file output logging sample
# -*- coding: utf-8 -*-
import logging
import sys
def main():
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
handler = logging.FileHandler('/tmp/aaa.log', mode='ab', encoding='utf-8')
formatter = logging.Formatter('[%(levelname)s] %(message)s (%(filename)s:%(lineno)s)')
@tomotaka
tomotaka / ex_resource_lock.rb
Created July 13, 2011 02:59
offering exclusive locking resources for multithread
class ExResourceLock
def initialize(map)
@rmap = map
@mtx = Mutex.new
@cv = ConditionVariable.new
@mtx.synchronize do
@free_resources = @rmap.keys
@locked_resources = []
end