Skip to content

Instantly share code, notes, and snippets.

@danielrichman
Created February 13, 2012 13:07
Show Gist options
  • Save danielrichman/1816833 to your computer and use it in GitHub Desktop.
Save danielrichman/1816833 to your computer and use it in GitHub Desktop.
replication is too slow
import couchdbkit
import gzip
import simplejson as json
def load():
g = gzip.open("habitat.json.gz")
try:
for line in g:
line = line.strip()
if line.endswith("[") or line == "]}":
continue
if line.endswith(","):
line = line[:-1]
yield json.loads(line)["doc"]
finally:
g.close()
def no_ddocs(g):
for doc in g:
if not doc["_id"].startswith("_design/"):
yield doc
def chunkify(g, s=1000):
chunk = []
for thing in g:
chunk.append(thing)
if len(chunk) > s:
yield chunk
chunk = []
if chunk:
yield chunk
def upload(db, g):
n = 0
for chunk in chunkify(g):
db.bulk_save(chunk)
n += len(chunk)
print n
if __name__ == "__main__":
s = couchdbkit.Server("http://localhost:5984")
upload(s["habitat"], no_ddocs(load()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment