DrDanL/python-firestore.py

## python-firestore.py
import pandas as pd
import firebase_admin
from firebase_admin import credentials, firestore

# set the root folder path
base_url = '<BASE URL HERE>'

# used for paging when downloading data e.g. only 1000 documents downloaded per call
limit = 1000

# create the connection
if not firebase_admin._apps:
    print('Setting connection')

    # use the service key to authorise the login
    cred = credentials.Certificate('ServiceAccountKey.json')

    # ensure we have the storage bucket permission.
    # for some reason Firebase wants this even if you don't want to pull the data
    default_app = firebase_admin.initialize_app(cred, {
    'storageBucket': '<STORAGE BUCKET'
    })

    # start the firestore client
    store = firestore.client()

# declare a function to stream the documents to download
def stream_collection_loop(collection, count, cursor=None):
    dict_array = []
    id_array = []

    while True:
        docs = []  # Very important. This frees the memory incurred in the recursion algorithm.

        if cursor:
            docs = [snapshot for snapshot in
                    collection.limit(limit).order_by('__name__').start_after(cursor).stream()]
        else:
            docs = [snapshot for snapshot in collection.limit(limit).order_by('__name__').stream()]

        for doc in docs:
            dict_array.append(doc.to_dict())
            id_array.append(doc.id)

        if len(docs) == limit:
            cursor = docs[limit-1]
            continue

        break

    return dict_array, id_array

# example data downloading and streaming
# use the defined function and query the 'users' table with all its data, starting from index 0
dict_array, id_array = stream_collection_loop(store.collection(u'users'), 0)
# once the query has finished we now have the dict_array and id_array
# dict_array is the data stored within the firesotre array
# id_array is the document ids often known as doc_id

# now this is where we can start to manage and process the data using pandas
# we create a panda view with the data, and document ids
df = pd.DataFrame(data=dict_array, index=id_array)

# to make sure we have the right format and references I always rename the axis to ensure doc_id is referenced
df.rename_axis("doc_id", inplace=True)

# the data has now been downloaded and can be viewed, saved and processed as needed
display(df.head(5))
	import pandas as pd
	import firebase_admin
	from firebase_admin import credentials, firestore

	# set the root folder path
	base_url = '<BASE URL HERE>'

	# used for paging when downloading data e.g. only 1000 documents downloaded per call
	limit = 1000

	# create the connection
	if not firebase_admin._apps:
	print('Setting connection')

	# use the service key to authorise the login
	cred = credentials.Certificate('ServiceAccountKey.json')

	# ensure we have the storage bucket permission.
	# for some reason Firebase wants this even if you don't want to pull the data
	default_app = firebase_admin.initialize_app(cred, {
	'storageBucket': '<STORAGE BUCKET'
	})

	# start the firestore client
	store = firestore.client()

	# declare a function to stream the documents to download
	def stream_collection_loop(collection, count, cursor=None):
	dict_array = []
	id_array = []

	while True:
	docs = [] # Very important. This frees the memory incurred in the recursion algorithm.

	if cursor:
	docs = [snapshot for snapshot in
	collection.limit(limit).order_by('__name__').start_after(cursor).stream()]
	else:
	docs = [snapshot for snapshot in collection.limit(limit).order_by('__name__').stream()]

	for doc in docs:
	dict_array.append(doc.to_dict())
	id_array.append(doc.id)

	if len(docs) == limit:
	cursor = docs[limit-1]
	continue

	break

	return dict_array, id_array

	# example data downloading and streaming
	# use the defined function and query the 'users' table with all its data, starting from index 0
	dict_array, id_array = stream_collection_loop(store.collection(u'users'), 0)
	# once the query has finished we now have the dict_array and id_array
	# dict_array is the data stored within the firesotre array
	# id_array is the document ids often known as doc_id

	# now this is where we can start to manage and process the data using pandas
	# we create a panda view with the data, and document ids
	df = pd.DataFrame(data=dict_array, index=id_array)

	# to make sure we have the right format and references I always rename the axis to ensure doc_id is referenced
	df.rename_axis("doc_id", inplace=True)

	# the data has now been downloaded and can be viewed, saved and processed as needed
	display(df.head(5))