Created
September 26, 2022 21:17
-
-
Save harazdovskiy/63bdddb945e22528d73639b48f61659b to your computer and use it in GitHub Desktop.
Example of reading and inserting 7million records ito mongo - https://github.com/harazdovskiy/mongo-performance/blob/master/insert/7m/7m-m30-per100k.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const dotenv = require('dotenv'); | |
dotenv.config() | |
const fs = require('fs'); | |
const objectSize = require('object-sizeof'); | |
const radash = require('radash'); | |
const JsonlParser = require('stream-json/jsonl/Parser'); | |
const {MongoClient, ServerApiVersion} = require("mongodb"); | |
const parser = new JsonlParser(); | |
const PATH_7MIL = '../dataset/7m-yelp-reviews.json'; | |
const COLLECTION_NAME = '7mil-collection'; | |
const DB_NAME = 'performance7m'; | |
const BYTE_IN_MB = 0.00000095367432; | |
(async () => { | |
let arrayToInsert = []; | |
let objectCounter = 0; | |
let started = Date.now() | |
const col = await getCollection(); | |
const pipeline = fs.createReadStream(PATH_7MIL).pipe(parser); | |
pipeline.on('data', async data => { | |
objectCounter++; | |
arrayToInsert.push(data.value) | |
if (objectCounter % 100_000 === 0) { | |
pipeline.pause() | |
console.log('arrayToInsert size -', objectSize(arrayToInsert) * BYTE_IN_MB, 'mb'); | |
console.time(`Inserting time - ${objectCounter}`); | |
await col.insertMany(arrayToInsert); | |
console.timeEnd(`Inserting time - ${objectCounter}`); | |
arrayToInsert = [] | |
console.log('--------------\n'); | |
await radash.sleep(100); | |
pipeline.resume() | |
} | |
}); | |
pipeline.on('end', async () => { | |
console.log('Operation took - ', (Date.now() - started) * 0.001, ' seconds\n'); | |
process.exit() | |
}); | |
})() | |
async function getCollection() { | |
const client = new MongoClient(process.env.MONGO_CLUSTER_M30, { | |
useNewUrlParser: true, | |
useUnifiedTopology: true, | |
serverApi: ServerApiVersion.v1 | |
}); | |
await client.connect(); | |
console.log('Connected successfully to server'); | |
const db = client.db(DB_NAME); | |
return db.collection(COLLECTION_NAME) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment