Skip to content

Instantly share code, notes, and snippets.

@harazdovskiy
Created September 26, 2022 21:17
Show Gist options
  • Save harazdovskiy/63bdddb945e22528d73639b48f61659b to your computer and use it in GitHub Desktop.
Save harazdovskiy/63bdddb945e22528d73639b48f61659b to your computer and use it in GitHub Desktop.
Example of reading and inserting 7million records ito mongo - https://github.com/harazdovskiy/mongo-performance/blob/master/insert/7m/7m-m30-per100k.js
const dotenv = require('dotenv');
dotenv.config()
const fs = require('fs');
const objectSize = require('object-sizeof');
const radash = require('radash');
const JsonlParser = require('stream-json/jsonl/Parser');
const {MongoClient, ServerApiVersion} = require("mongodb");
const parser = new JsonlParser();
const PATH_7MIL = '../dataset/7m-yelp-reviews.json';
const COLLECTION_NAME = '7mil-collection';
const DB_NAME = 'performance7m';
const BYTE_IN_MB = 0.00000095367432;
(async () => {
let arrayToInsert = [];
let objectCounter = 0;
let started = Date.now()
const col = await getCollection();
const pipeline = fs.createReadStream(PATH_7MIL).pipe(parser);
pipeline.on('data', async data => {
objectCounter++;
arrayToInsert.push(data.value)
if (objectCounter % 100_000 === 0) {
pipeline.pause()
console.log('arrayToInsert size -', objectSize(arrayToInsert) * BYTE_IN_MB, 'mb');
console.time(`Inserting time - ${objectCounter}`);
await col.insertMany(arrayToInsert);
console.timeEnd(`Inserting time - ${objectCounter}`);
arrayToInsert = []
console.log('--------------\n');
await radash.sleep(100);
pipeline.resume()
}
});
pipeline.on('end', async () => {
console.log('Operation took - ', (Date.now() - started) * 0.001, ' seconds\n');
process.exit()
});
})()
async function getCollection() {
const client = new MongoClient(process.env.MONGO_CLUSTER_M30, {
useNewUrlParser: true,
useUnifiedTopology: true,
serverApi: ServerApiVersion.v1
});
await client.connect();
console.log('Connected successfully to server');
const db = client.db(DB_NAME);
return db.collection(COLLECTION_NAME)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment