Skip to content

Instantly share code, notes, and snippets.

@kevinjqiu
Last active December 21, 2016 04:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kevinjqiu/dd461b36a6f1d6d755d7a317d8f98b75 to your computer and use it in GitHub Desktop.
Save kevinjqiu/dd461b36a6f1d6d755d7a317d8f98b75 to your computer and use it in GitHub Desktop.
couchdb indexing benchmark
docker build -f Dockerfile.python -t couchdb-python .
docker run -p 9999:5984 -d -v $(pwd)/data:/usr/local/var/lib/couchdb couchdb-python
curl -XPUT -H"Content-Type: application/json" http://localhost:9999/_config/query_servers/python -d'"/usr/bin/python /usr/local/bin/couchpy"'
containerid=
docker exec $containerid bash -c "apt-get install -y python-dev"
docker exec $containerid bash -c "pip install simplejson"
curl -XPUT -H"Content-Type: application/json" http://localhost:9999/_config/query_servers/python -d'"/usr/bin/python /usr/local/bin/couchpy --json-module=simplejson"'
curl -XPUT -H"Content-Type: application/json" http://localhost:9999/_config/query_servers/python -d'"/opt/pypy/bin/pypy /opt/pypy/bin/couchpy"'
curl -XPUT -H "Content-Type:application/json" http://localhost:9999/_config/native_query_servers/erlang -d'"{couch_native_process, start_link, []}"'
See the blogpost
# cps.py - calculates the changes per second of a index
# active tasks:
# [{"pid":"<0.19099.4>","changes_done":3333,"database":"test","design_document":"_design/scoresByMonthJS","progress":3,"started_on":1482254716,"total_changes":100001,"type":"indexer","updated_on":1482254717}]
import sys
import requests
import time
DB_URL = 'http://localhost:9999'
if __name__ == '__main__':
design_doc = sys.argv[1]
print('design doc: {}'.format(design_doc))
print('Press Ctrl+C to exit')
all_changes_per_sec = []
while True:
try:
response = requests.get('{}/_active_tasks'.format(DB_URL))
tasks = [
task for task in response.json()
if task.get('design_document') == design_doc
]
if not len(tasks) == 1:
continue
task = tasks[0]
changes_done = int(task['changes_done'])
started_on, updated_on = int(task['started_on']), int(task['updated_on'])
if started_on == updated_on:
continue
changes_per_sec = 1.0 * changes_done / (updated_on - started_on)
all_changes_per_sec.append(changes_per_sec)
print('c/s = {:.2f}'.format(changes_per_sec))
time.sleep(1)
except KeyboardInterrupt:
average = 1.0 * sum(all_changes_per_sec) / len(all_changes_per_sec)
print('average = {:.2f}'.format(average))
break
FROM couchdb
RUN apt-get update -yqq && apt-get install -y python python-pip
RUN pip install couchdb
import string
import sys
import requests
import random
import datetime
from multiprocessing import Pool
DB_URL = 'http://localhost:9999/test'
def random_name():
generate_character = lambda _: random.choice(string.ascii_letters)
return ''.join(map(generate_character, range(6)))
def random_date():
# get a random date from within last year
today = datetime.date.today()
that_day = today - datetime.timedelta(days=random.randint(0, 366))
return datetime.datetime.strftime(that_day, '%Y-%m-%d')
def generate_doc(i):
doc = {
'metadata': {
'docType': 'score',
'createdAt': random_date()
},
'username': random_name(),
'score': random.randint(0, 5000)
}
response = requests.post(DB_URL, json=doc)
response.raise_for_status()
if i % 5000 == 0:
print('{} Done'.format(i))
if __name__ == '__main__':
num_of_docs = int(sys.argv[1])
requests.delete(DB_URL) # delete the database if it exists already
response = requests.put(DB_URL) # create the test database
print('Generating {} docs'.format(num_of_docs))
pool = Pool(50)
try:
pool.map(generate_doc, range(num_of_docs))
finally:
stats = requests.get(DB_URL).json()
print('doc_count: {}'.format(stats['doc_count']))
print('disk_size: {}'.format(stats['disk_size']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment