Created
February 13, 2012 13:07
-
-
Save danielrichman/1816833 to your computer and use it in GitHub Desktop.
replication is too slow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import couchdbkit | |
import gzip | |
import simplejson as json | |
def load(): | |
g = gzip.open("habitat.json.gz") | |
try: | |
for line in g: | |
line = line.strip() | |
if line.endswith("[") or line == "]}": | |
continue | |
if line.endswith(","): | |
line = line[:-1] | |
yield json.loads(line)["doc"] | |
finally: | |
g.close() | |
def no_ddocs(g): | |
for doc in g: | |
if not doc["_id"].startswith("_design/"): | |
yield doc | |
def chunkify(g, s=1000): | |
chunk = [] | |
for thing in g: | |
chunk.append(thing) | |
if len(chunk) > s: | |
yield chunk | |
chunk = [] | |
if chunk: | |
yield chunk | |
def upload(db, g): | |
n = 0 | |
for chunk in chunkify(g): | |
db.bulk_save(chunk) | |
n += len(chunk) | |
print n | |
if __name__ == "__main__": | |
s = couchdbkit.Server("http://localhost:5984") | |
upload(s["habitat"], no_ddocs(load())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment