Skip to content

Instantly share code, notes, and snippets.

@sing1ee
Created July 9, 2012 03:30
Show Gist options
  • Save sing1ee/3074072 to your computer and use it in GitHub Desktop.
Save sing1ee/3074072 to your computer and use it in GitHub Desktop.
read Cassandra index file with workerpool
# !/usr/bin/python
# -*- encoding: utf-8 -*-
import struct
import os
import redis
import mmh3
import sys
import workerpool
import logging
from os.path import getsize
def read_short_length(file=None):
b = struct.unpack('b', file.read(1))[0]
b = (b & 0xFF) << 8
c = struct.unpack('b', file.read(1))[0]
return b | (c & 0xFF)
def read(file=None, len=16):
s = file.read(len)
return s
def skip(file=None, bytes_num=0):
pointer = file.tell()
file.seek(pointer + bytes_num)
hosts = ["10.1.1.37", "10.1.1.38", "10.1.1.39", "10.1.1.40", "10.1.1.41", "10.1.1.42", "10.1.1.43"]
clients = []
for h in hosts:
clients.append(redis.Redis(host=h))
#dirs = ['/sata1/var_lib/disk1/WebSearch', '/sata2/var_lib/disk0/WebSearch']
pool = workerpool.WorkerPool(size=8)
logging.basicConfig(filename = os.path.join(os.getcwd(), sys.argv[2]), level = logging.DEBUG)
def process():
dirs = [sys.argv[1]]
ifiles = []
for d in dirs:
print d
files = os.listdir(d)
for fname in files:
full_name = os.path.join(d, fname)
idx = full_name.find('tmp')
if idx >= 0:
continue
idx = full_name.find('-Index.db')
if idx >= 0:
ifiles.append(full_name)
logging.debug( 'add' + fname)
pool.map(worker, ifiles)
pool.shutdown()
pool.wait()
return
def worker(file_name):
count = 0
file = open(file_name, 'r')
file_size = getsize(file_name)
while 1:
pointer = file.tell()
if pointer == file_size:
break
l = read_short_length(file)
key = read(file, l)
idx = mmh3.hash(key) % 7
clients[idx].set(key, 'z')
if count % 100000 == 0:
logging.debug(key + ' ' + clients[idx].get(key) + ' ' + str(count) + ' ' + str(idx))
count += 1
skip(file, 8)
logging.debug(file_name + 'ok')
return
if __name__ == '__main__':
process()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment