Skip to content

Instantly share code, notes, and snippets.

@anish2690
Forked from navidshad/firestore_cursor.js
Created September 22, 2021 05:36
Show Gist options
  • Save anish2690/18e098015269a55149fa6685956f5f83 to your computer and use it in GitHub Desktop.
Save anish2690/18e098015269a55149fa6685956f5f83 to your computer and use it in GitHub Desktop.
Read all data from Firestore by a cursor and resolve the Bandwidth Exhausted error
/*
How can we read all data from a firestore collection?
HI guys I want to show you how.
I had these problems for reading all data from a firestore collection:
1. firestore cursor is not proper as a powerful cursor as to be able make this task done.
2. after few thousands request you will get Bandwidth Exhausted error.
For doing this task we need two things and each one can helps us resolve above problems:
1. A powerfull cursor
2. A child-process manager
Why cursor?
firebase default cursor cant read all data automatically and needs your interaction to tell it go next part, gor next part...
so we need a cursor to tell firebase cursor go next part automatically.
Why child-process?
After a while when I gainst the second problem, I figured out if we stop running process and run a new process we can pass the error and continue the job. so we need a child-process manager to do it for us.
For implementing these two things we need two scripts parent and child:
Parent is the child-process manager. Child is the cursor. they can send message for each other and have colaboration.
This is the scenario:
- Parent got collection detail from developer
- Then run a cursor as a child-process and provides some basic info about the collection
- Then child goes through a collection and read documents until it gets [8 RESOURCE_EXHAUSTED] error, then send a message to parent to inform it from the error.
- Then parent kills child and create a new one and tells it where to start reading again.
*/
//
// Parent Script =============================
// ===========================================
// you can call it index.js
//
const childProcess = require('child_process');
const path = require('path');
async function readAllDocs({
// collection name
collection,
// you should provide a key to sort all docs by it
orderBy,
// total documents on each request
limit = 1000,
// it's a callback being called per document
onDoc,
}) {
return new Promise(async (done) => {
let allowContinue = true;
let lastId = null;
let counter = 0;
//
// This is a scoped function being called continuously
// it will run the child script and listen to its messages
// each time the error of Bandwidth Exhausted happened it kills the child script and run it again
// until there were no documents anymore
const runChild = () => {
console.log('run cursor for', collection);
return new Promise(async (resolve) => {
// Running new child process
const child = childProcess
.fork(
// path to child script
path.join(__dirname, 'cursor.js'),
// process options
{
// pass a set of environment variables
// to child process
env: {
...process.env,
lastId, collection, limit, orderBy
},
},
)
// listen to child messages
child.on('message', async (data) => {
// When message contains a document
if (data.type == 'DOC') {
if (onDoc) onDoc(data.doc, counter);
counter++
}
// When reading collection has been done
else if (data.type == 'DONE') {
child.kill()
// make this false to stop the while cycle
allowContinue = false
resolve()
}
// When "Bandwidth Exhausted" error happened
// this child will be killed
else if (data.type == 'ERROR') {
child.kill()
// make this true to continue the while cycle
allowContinue = true;
// store last document id for running a new child
lastId = data.lastId;
resolve();
}
})
})
}
// start new child process while condition is true
while (allowContinue) {
await runChild();
}
// stop reading
done();
})
}
// Now you can start to read a collection
readAllDocs({
collection: 'users',
orderBy: 'uid',
onDoc: (doc, index) => {
// do something with the current doc
}
})
//
// Child Script ==============================
// ===========================================
// you can call it cursor.js
//
const admin = require("firebase-admin");
admin.initializeApp({});
async function runCursor({
// collection name
collection,
// total documents on each call
limit = 1000,
// on document read
onDoc,
onDone,
}) {
let lastDoc;
let lastId = process.env.lastId || null;
let allowGoAhead = true;
let orderBy = process.env.orderBy;
if (lastId) {
// Get last document from last killed process
await admin.firestore().collection(collection).doc(lastId).get()
.then(sp => {
if (sp.exists) lastDoc = sp
})
}
// this is a inner function
// it will be used in while section
const getDocs = () => {
let query = admin.firestore().collection(collection).orderBy(orderBy).limit(limit)
if (lastDoc) {
// define where to start to read
// last doc exists
query = query.startAfter(lastDoc)
}
return query.get().then(sp => {
if (sp.docs.length > 0) {
for (let i = 0; i < sp.docs.length; i++) {
const doc = sp.docs[i];
// run onDoc call back
if (onDoc) onDoc(doc);
}
// define end of this part
lastDoc = sp.docs[sp.docs.length - 1]
// continue the cursor
allowGoAhead = true
} else {
// stop cursor if there is not more docs
allowGoAhead = false;
}
}).catch(error => {
console.log(error);
// Inform parent process from this error
process.send({ type: 'ERROR', lastId: lastDoc.id, error });
})
}
while (allowGoAhead) {
await getDocs();
}
onDone();
}
runCursor({
collection: process.env.collection,
limit: parseInt(process.env.limit),
onDoc: (doc) => {
process.send({ type: 'DOC', doc: doc.data() });
},
onDone: () => {
process.send({ type: 'DONE' });
}
});
/*
That's it
Use these scripts to read all documents from a firestore collection
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment