Skip to content

Instantly share code, notes, and snippets.

@varmil
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save varmil/ada7471eba02797130fb to your computer and use it in GitHub Desktop.
Save varmil/ada7471eba02797130fb to your computer and use it in GitHub Desktop.
200万件のCSVをparseしつつMongoDBに素早く突っ込むにはBulk()を使う(Nodeでやってみた) ref: http://qiita.com/varmil/items/be53a562393dcfb8867e
var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
var CSV = require('comma-separated-values');
var NAME_MONGO_DB = 'eijiro';
var NAME_MONGO_COLLECTION = 'words';
var url = 'mongodb://localhost:27017/' + NAME_MONGO_DB;
var inputCsv = process.argv[2];
var text = fs.readFileSync(inputCsv, 'utf-8');
// data: example [ {a : 1}, {a : 2}, {a : 3} ]
var insertDocuments = function(db, data, callback) {
// Get the documents collection
var collection = db.collection(NAME_MONGO_COLLECTION);
// Insert some documents
collection.insert(data, function(err, result) {
if (err) return console.error(err);
callback(result);
});
};
MongoClient.connect(url, function(err, db) {
if (err) return console.error(err);
var rows = new CSV(text, { header: true, cast: false }).parse();
insertDocuments(db, rows, function(result) {
db.close();
});
});
# コマンド例
node good.js eijiro_dic_utf8.csv
var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
var CSV = require('comma-separated-values');
var NAME_MONGO_DB = 'eijiro';
var NAME_MONGO_COLLECTION = 'words';
var url = 'mongodb://localhost:27017/' + NAME_MONGO_DB;
var inputCsv = process.argv[2];
var text = fs.readFileSync(inputCsv, 'utf-8');
MongoClient.connect(url, function(err, db) {
if (err) return console.error(err);
var col = db.collection(NAME_MONGO_COLLECTION);
var batch = col.initializeUnorderedBulkOp();
new CSV(text, { header: true, cast: false }).forEach(function(obj) {
batch.insert(obj);
});
batch.execute(function(err, result) {
db.close();
if (err) console.err(err);
process.exit();
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment