Last active
December 21, 2016 04:04
-
-
Save kevinjqiu/dd461b36a6f1d6d755d7a317d8f98b75 to your computer and use it in GitHub Desktop.
couchdb indexing benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker build -f Dockerfile.python -t couchdb-python . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker run -p 9999:5984 -d -v $(pwd)/data:/usr/local/var/lib/couchdb couchdb-python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XPUT -H"Content-Type: application/json" http://localhost:9999/_config/query_servers/python -d'"/usr/bin/python /usr/local/bin/couchpy"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
containerid= | |
docker exec $containerid bash -c "apt-get install -y python-dev" | |
docker exec $containerid bash -c "pip install simplejson" | |
curl -XPUT -H"Content-Type: application/json" http://localhost:9999/_config/query_servers/python -d'"/usr/bin/python /usr/local/bin/couchpy --json-module=simplejson"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XPUT -H"Content-Type: application/json" http://localhost:9999/_config/query_servers/python -d'"/opt/pypy/bin/pypy /opt/pypy/bin/couchpy"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XPUT -H "Content-Type:application/json" http://localhost:9999/_config/native_query_servers/erlang -d'"{couch_native_process, start_link, []}"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
See the blogpost |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cps.py - calculates the changes per second of a index | |
# active tasks: | |
# [{"pid":"<0.19099.4>","changes_done":3333,"database":"test","design_document":"_design/scoresByMonthJS","progress":3,"started_on":1482254716,"total_changes":100001,"type":"indexer","updated_on":1482254717}] | |
import sys | |
import requests | |
import time | |
DB_URL = 'http://localhost:9999' | |
if __name__ == '__main__': | |
design_doc = sys.argv[1] | |
print('design doc: {}'.format(design_doc)) | |
print('Press Ctrl+C to exit') | |
all_changes_per_sec = [] | |
while True: | |
try: | |
response = requests.get('{}/_active_tasks'.format(DB_URL)) | |
tasks = [ | |
task for task in response.json() | |
if task.get('design_document') == design_doc | |
] | |
if not len(tasks) == 1: | |
continue | |
task = tasks[0] | |
changes_done = int(task['changes_done']) | |
started_on, updated_on = int(task['started_on']), int(task['updated_on']) | |
if started_on == updated_on: | |
continue | |
changes_per_sec = 1.0 * changes_done / (updated_on - started_on) | |
all_changes_per_sec.append(changes_per_sec) | |
print('c/s = {:.2f}'.format(changes_per_sec)) | |
time.sleep(1) | |
except KeyboardInterrupt: | |
average = 1.0 * sum(all_changes_per_sec) / len(all_changes_per_sec) | |
print('average = {:.2f}'.format(average)) | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM couchdb | |
RUN apt-get update -yqq && apt-get install -y python python-pip | |
RUN pip install couchdb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
import sys | |
import requests | |
import random | |
import datetime | |
from multiprocessing import Pool | |
DB_URL = 'http://localhost:9999/test' | |
def random_name(): | |
generate_character = lambda _: random.choice(string.ascii_letters) | |
return ''.join(map(generate_character, range(6))) | |
def random_date(): | |
# get a random date from within last year | |
today = datetime.date.today() | |
that_day = today - datetime.timedelta(days=random.randint(0, 366)) | |
return datetime.datetime.strftime(that_day, '%Y-%m-%d') | |
def generate_doc(i): | |
doc = { | |
'metadata': { | |
'docType': 'score', | |
'createdAt': random_date() | |
}, | |
'username': random_name(), | |
'score': random.randint(0, 5000) | |
} | |
response = requests.post(DB_URL, json=doc) | |
response.raise_for_status() | |
if i % 5000 == 0: | |
print('{} Done'.format(i)) | |
if __name__ == '__main__': | |
num_of_docs = int(sys.argv[1]) | |
requests.delete(DB_URL) # delete the database if it exists already | |
response = requests.put(DB_URL) # create the test database | |
print('Generating {} docs'.format(num_of_docs)) | |
pool = Pool(50) | |
try: | |
pool.map(generate_doc, range(num_of_docs)) | |
finally: | |
stats = requests.get(DB_URL).json() | |
print('doc_count: {}'.format(stats['doc_count'])) | |
print('disk_size: {}'.format(stats['disk_size'])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment