-
-
Save anish2690/18e098015269a55149fa6685956f5f83 to your computer and use it in GitHub Desktop.
Read all data from Firestore by a cursor and resolve the Bandwidth Exhausted error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
How can we read all data from a firestore collection? | |
HI guys I want to show you how. | |
I had these problems for reading all data from a firestore collection: | |
1. firestore cursor is not proper as a powerful cursor as to be able make this task done. | |
2. after few thousands request you will get Bandwidth Exhausted error. | |
For doing this task we need two things and each one can helps us resolve above problems: | |
1. A powerfull cursor | |
2. A child-process manager | |
Why cursor? | |
firebase default cursor cant read all data automatically and needs your interaction to tell it go next part, gor next part... | |
so we need a cursor to tell firebase cursor go next part automatically. | |
Why child-process? | |
After a while when I gainst the second problem, I figured out if we stop running process and run a new process we can pass the error and continue the job. so we need a child-process manager to do it for us. | |
For implementing these two things we need two scripts parent and child: | |
Parent is the child-process manager. Child is the cursor. they can send message for each other and have colaboration. | |
This is the scenario: | |
- Parent got collection detail from developer | |
- Then run a cursor as a child-process and provides some basic info about the collection | |
- Then child goes through a collection and read documents until it gets [8 RESOURCE_EXHAUSTED] error, then send a message to parent to inform it from the error. | |
- Then parent kills child and create a new one and tells it where to start reading again. | |
*/ | |
// | |
// Parent Script ============================= | |
// =========================================== | |
// you can call it index.js | |
// | |
const childProcess = require('child_process'); | |
const path = require('path'); | |
async function readAllDocs({ | |
// collection name | |
collection, | |
// you should provide a key to sort all docs by it | |
orderBy, | |
// total documents on each request | |
limit = 1000, | |
// it's a callback being called per document | |
onDoc, | |
}) { | |
return new Promise(async (done) => { | |
let allowContinue = true; | |
let lastId = null; | |
let counter = 0; | |
// | |
// This is a scoped function being called continuously | |
// it will run the child script and listen to its messages | |
// each time the error of Bandwidth Exhausted happened it kills the child script and run it again | |
// until there were no documents anymore | |
const runChild = () => { | |
console.log('run cursor for', collection); | |
return new Promise(async (resolve) => { | |
// Running new child process | |
const child = childProcess | |
.fork( | |
// path to child script | |
path.join(__dirname, 'cursor.js'), | |
// process options | |
{ | |
// pass a set of environment variables | |
// to child process | |
env: { | |
...process.env, | |
lastId, collection, limit, orderBy | |
}, | |
}, | |
) | |
// listen to child messages | |
child.on('message', async (data) => { | |
// When message contains a document | |
if (data.type == 'DOC') { | |
if (onDoc) onDoc(data.doc, counter); | |
counter++ | |
} | |
// When reading collection has been done | |
else if (data.type == 'DONE') { | |
child.kill() | |
// make this false to stop the while cycle | |
allowContinue = false | |
resolve() | |
} | |
// When "Bandwidth Exhausted" error happened | |
// this child will be killed | |
else if (data.type == 'ERROR') { | |
child.kill() | |
// make this true to continue the while cycle | |
allowContinue = true; | |
// store last document id for running a new child | |
lastId = data.lastId; | |
resolve(); | |
} | |
}) | |
}) | |
} | |
// start new child process while condition is true | |
while (allowContinue) { | |
await runChild(); | |
} | |
// stop reading | |
done(); | |
}) | |
} | |
// Now you can start to read a collection | |
readAllDocs({ | |
collection: 'users', | |
orderBy: 'uid', | |
onDoc: (doc, index) => { | |
// do something with the current doc | |
} | |
}) | |
// | |
// Child Script ============================== | |
// =========================================== | |
// you can call it cursor.js | |
// | |
const admin = require("firebase-admin"); | |
admin.initializeApp({}); | |
async function runCursor({ | |
// collection name | |
collection, | |
// total documents on each call | |
limit = 1000, | |
// on document read | |
onDoc, | |
onDone, | |
}) { | |
let lastDoc; | |
let lastId = process.env.lastId || null; | |
let allowGoAhead = true; | |
let orderBy = process.env.orderBy; | |
if (lastId) { | |
// Get last document from last killed process | |
await admin.firestore().collection(collection).doc(lastId).get() | |
.then(sp => { | |
if (sp.exists) lastDoc = sp | |
}) | |
} | |
// this is a inner function | |
// it will be used in while section | |
const getDocs = () => { | |
let query = admin.firestore().collection(collection).orderBy(orderBy).limit(limit) | |
if (lastDoc) { | |
// define where to start to read | |
// last doc exists | |
query = query.startAfter(lastDoc) | |
} | |
return query.get().then(sp => { | |
if (sp.docs.length > 0) { | |
for (let i = 0; i < sp.docs.length; i++) { | |
const doc = sp.docs[i]; | |
// run onDoc call back | |
if (onDoc) onDoc(doc); | |
} | |
// define end of this part | |
lastDoc = sp.docs[sp.docs.length - 1] | |
// continue the cursor | |
allowGoAhead = true | |
} else { | |
// stop cursor if there is not more docs | |
allowGoAhead = false; | |
} | |
}).catch(error => { | |
console.log(error); | |
// Inform parent process from this error | |
process.send({ type: 'ERROR', lastId: lastDoc.id, error }); | |
}) | |
} | |
while (allowGoAhead) { | |
await getDocs(); | |
} | |
onDone(); | |
} | |
runCursor({ | |
collection: process.env.collection, | |
limit: parseInt(process.env.limit), | |
onDoc: (doc) => { | |
process.send({ type: 'DOC', doc: doc.data() }); | |
}, | |
onDone: () => { | |
process.send({ type: 'DONE' }); | |
} | |
}); | |
/* | |
That's it | |
Use these scripts to read all documents from a firestore collection | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment