-
-
Save pansapiens/9533d3d28b4a59faab42cbc1c8c9b2c6 to your computer and use it in GitHub Desktop.
Export from Google App Engine Datastore Backup LevelDB format to JSON flat file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
# Export from Google App Engine Datastore Backup LevelDB format to JSON flat file | |
# Based on: https://gist.github.com/xlfe/af25f160256e4d52f499dee7e8fa212f | |
## | |
# 2024 instructions: | |
## | |
# Using the Google Cloud console (https://console.cloud.google.com), find "Firestore" | |
# and export your database to a Cloud Storage "Bucket". Download the content of the Bucket. | |
# | |
# You'll need Python 2.7, the (old, final) App Engine SDK and some package dependencies. | |
# | |
# wget https://storage.googleapis.com/appengine-sdks/deprecated/1921/google_appengine_1.9.21.zip | |
# unzip google_appengine_1.9.21.zip | |
# | |
# conda create -n old-app-engine -y python=2 pyyaml simplejson | |
# conda activate old-app-engine | |
# | |
# ./appengine_leveldb2json.py path/to/firestore/leveldb | gzip -c >export.json.gz | |
# | |
import sys | |
import os | |
import base64 | |
try: | |
import simplejson as json | |
except ImportError: | |
import json | |
import datetime | |
sys.path.append(os.path.expanduser('./google_appengine/')) | |
from google.appengine.api.files import records | |
from google.appengine.datastore import entity_pb | |
from google.appengine.api import datastore | |
from google.appengine.api.datastore_types import Key, Blob, Text, ByteString | |
from google.appengine.api.users import User | |
from google.appengine.ext import db | |
# My database has these types: | |
# | |
# <class 'google.appengine.api.datastore_types.Blob'> | |
# <class 'google.appengine.api.datastore_types.ByteString'> | |
# <class 'google.appengine.api.datastore_types.Key'> | |
# <class 'google.appengine.api.datastore_types.Text'> # behaves like unicode | |
# <class 'google.appengine.api.users.User'> | |
# <type 'bool'> | |
# <type 'datetime.datetime'> | |
# <type 'float'> | |
# <type 'list'> | |
# <type 'long'> | |
# <type 'NoneType'> | |
# <type 'str'> | |
# <type 'unicode'> | |
def repr_key_path(k): | |
return '/'.join([str(part) for part in k.to_path()]) | |
def myconverter(o): | |
if isinstance(o, datetime.datetime): | |
return o.isoformat() | |
elif isinstance(o, str): | |
return base64.b64encode(o) | |
elif isinstance(o, unicode): | |
return o.encode('utf-8') | |
elif isinstance(o, db.Text): | |
return o.encode('utf-8') | |
elif isinstance(o, db.Key): | |
return repr_key_path(o) #str(o) | |
elif isinstance(o, db.users.User): | |
e = o.email() | |
i = o.user_id() | |
return {'email': e, 'id': i} | |
# # convert User to: username+user_id@gmail.com | |
# if e and i: | |
# email_parts = o.email().split("@") | |
# return "%s+%s@%s" % (email_parts[0], i, email_parts[1]) | |
# elif e and not i: | |
# return e | |
# elif i and not e: | |
# return i | |
# else: | |
# return repr(o) | |
elif isinstance(o, db.Blob): | |
return str(o.ToXml()) | |
elif isinstance(o, db.ByteString): | |
return str(o.ToXml()) | |
return repr(o) | |
def process_entity(entity): | |
processed = {} | |
for key, value in entity.items(): | |
if isinstance(value, str): | |
processed[key] = base64.b64encode(value) | |
elif isinstance(value, unicode): | |
processed[key] = value.encode('utf-8') | |
elif isinstance(value, db.Text): | |
processed[key] = value.encode('utf-8') | |
elif isinstance(value, db.Key): | |
processed[key] = repr_key_path(value) #str(value) | |
elif isinstance(value, db.users.User): | |
processed[key] = value # passthrough | |
#processed[key+"_email"] = value.email() | |
#processed[key+"_guid"] = value.user_id() | |
# alternative guid that might account for federation ? | |
processed[key+"_hash"] = hash(value) | |
elif isinstance(value, db.Blob): | |
processed[key] = str(value.ToXml()) | |
elif isinstance(value, db.ByteString): | |
processed[key] = str(value.ToXml()) | |
else: | |
processed[key] = value | |
processed['table'] = entity['table'] | |
return processed | |
for path, dirs, files in os.walk(sys.argv[1]): | |
for fn in files: | |
f = os.path.join(path, fn) | |
if not fn.startswith('output'): | |
continue | |
with open(f, 'rb') as raw: | |
reader = records.RecordsReader(raw) | |
for record in reader: | |
entity_proto = entity_pb.EntityProto(contents=record) | |
entity = datastore.Entity.FromPb(entity_proto) | |
entity['table'] = path[2:] | |
entity['kind'] = unicode(entity.kind()) | |
entity['id'] = unicode(entity_proto.key().path().element_[0].name_) | |
if not entity['id']: | |
entity['id'] = entity_proto.key().path().element_[0].id_ | |
processed_entity = process_entity(entity) | |
try: | |
j = json.dumps(processed_entity, default=myconverter) | |
print(j) | |
except TypeError as e: | |
print >> sys.stderr, "Error processing entity:", str(e) | |
print >> sys.stderr, "Entity:", repr(entity) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment