Skip to content

Instantly share code, notes, and snippets.

@pansapiens
Forked from xlfe/export.py
Last active July 13, 2024 05:34
Show Gist options
  • Save pansapiens/9533d3d28b4a59faab42cbc1c8c9b2c6 to your computer and use it in GitHub Desktop.
Save pansapiens/9533d3d28b4a59faab42cbc1c8c9b2c6 to your computer and use it in GitHub Desktop.
Export from Google App Engine Datastore Backup LevelDB format to JSON flat file
#!/usr/bin/env python2.7
# Export from Google App Engine Datastore Backup LevelDB format to JSON flat file
# Based on: https://gist.github.com/xlfe/af25f160256e4d52f499dee7e8fa212f
##
# 2024 instructions:
##
# Using the Google Cloud console (https://console.cloud.google.com), find "Firestore"
# and export your database to a Cloud Storage "Bucket". Download the content of the Bucket.
#
# You'll need Python 2.7, the (old, final) App Engine SDK and some package dependencies.
#
# wget https://storage.googleapis.com/appengine-sdks/deprecated/1921/google_appengine_1.9.21.zip
# unzip google_appengine_1.9.21.zip
#
# conda create -n old-app-engine -y python=2 pyyaml simplejson
# conda activate old-app-engine
#
# ./appengine_leveldb2json.py path/to/firestore/leveldb | gzip -c >export.json.gz
#
import sys
import os
import base64
try:
import simplejson as json
except ImportError:
import json
import datetime
sys.path.append(os.path.expanduser('./google_appengine/'))
from google.appengine.api.files import records
from google.appengine.datastore import entity_pb
from google.appengine.api import datastore
from google.appengine.api.datastore_types import Key, Blob, Text, ByteString
from google.appengine.api.users import User
from google.appengine.ext import db
# My database has these types:
#
# <class 'google.appengine.api.datastore_types.Blob'>
# <class 'google.appengine.api.datastore_types.ByteString'>
# <class 'google.appengine.api.datastore_types.Key'>
# <class 'google.appengine.api.datastore_types.Text'> # behaves like unicode
# <class 'google.appengine.api.users.User'>
# <type 'bool'>
# <type 'datetime.datetime'>
# <type 'float'>
# <type 'list'>
# <type 'long'>
# <type 'NoneType'>
# <type 'str'>
# <type 'unicode'>
def repr_key_path(k):
return '/'.join([str(part) for part in k.to_path()])
def myconverter(o):
if isinstance(o, datetime.datetime):
return o.isoformat()
elif isinstance(o, str):
return base64.b64encode(o)
elif isinstance(o, unicode):
return o.encode('utf-8')
elif isinstance(o, db.Text):
return o.encode('utf-8')
elif isinstance(o, db.Key):
return repr_key_path(o) #str(o)
elif isinstance(o, db.users.User):
e = o.email()
i = o.user_id()
return {'email': e, 'id': i}
# # convert User to: username+user_id@gmail.com
# if e and i:
# email_parts = o.email().split("@")
# return "%s+%s@%s" % (email_parts[0], i, email_parts[1])
# elif e and not i:
# return e
# elif i and not e:
# return i
# else:
# return repr(o)
elif isinstance(o, db.Blob):
return str(o.ToXml())
elif isinstance(o, db.ByteString):
return str(o.ToXml())
return repr(o)
def process_entity(entity):
processed = {}
for key, value in entity.items():
if isinstance(value, str):
processed[key] = base64.b64encode(value)
elif isinstance(value, unicode):
processed[key] = value.encode('utf-8')
elif isinstance(value, db.Text):
processed[key] = value.encode('utf-8')
elif isinstance(value, db.Key):
processed[key] = repr_key_path(value) #str(value)
elif isinstance(value, db.users.User):
processed[key] = value # passthrough
#processed[key+"_email"] = value.email()
#processed[key+"_guid"] = value.user_id()
# alternative guid that might account for federation ?
processed[key+"_hash"] = hash(value)
elif isinstance(value, db.Blob):
processed[key] = str(value.ToXml())
elif isinstance(value, db.ByteString):
processed[key] = str(value.ToXml())
else:
processed[key] = value
processed['table'] = entity['table']
return processed
for path, dirs, files in os.walk(sys.argv[1]):
for fn in files:
f = os.path.join(path, fn)
if not fn.startswith('output'):
continue
with open(f, 'rb') as raw:
reader = records.RecordsReader(raw)
for record in reader:
entity_proto = entity_pb.EntityProto(contents=record)
entity = datastore.Entity.FromPb(entity_proto)
entity['table'] = path[2:]
entity['kind'] = unicode(entity.kind())
entity['id'] = unicode(entity_proto.key().path().element_[0].name_)
if not entity['id']:
entity['id'] = entity_proto.key().path().element_[0].id_
processed_entity = process_entity(entity)
try:
j = json.dumps(processed_entity, default=myconverter)
print(j)
except TypeError as e:
print >> sys.stderr, "Error processing entity:", str(e)
print >> sys.stderr, "Entity:", repr(entity)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment