Skip to content

Instantly share code, notes, and snippets.

@tforster
Last active September 8, 2018 14:51
Show Gist options
  • Save tforster/4718813 to your computer and use it in GitHub Desktop.
Save tforster/4718813 to your computer and use it in GitHub Desktop.
[Tumblr to Mongo] NodeJS script to fetch all posts from multiple tumblr blogs into a single MongoDB collection. It saves the sorted data to disk as a .JSON file as well as bulk inserting into a MongoDB collection. #tumblr #mongo
/*
* Edit these variables to suit your environment
*/
var blogs = ["x.tumblr.com", "y.tumblr.com", "z.tumblr.com"];
var apiKey = "**************************************************";
var databaseName = "node-blog";
var databaseHost = "localhost";
var databasePort = 27017;
var databaseCollection = "posts";
var Posts = new Array();
var currentBlog = 0;
var start = new Date();
CallAndLoad(0, blogs[0]);
function CallAndLoad(offset, blog) {
var fs = require("fs");
var http = require("http");
var options = {
host: "api.tumblr.com",
path: "/v2/blog/" + blog + "/posts/?api_key=" + apiKey + "&offset=" + offset,
method: "GET",
headers: { "Content-Type": "application/json" }
}
var req = http.request(options, function (response) {
var data = "";
response.setEncoding("utf8");
response.on("data", function (chunk) {
data += chunk;
});
response.on("end", function () {
var json = JSON.parse(data);
if (json.response.posts.length == 0) {
// Fetch the next blog
if (currentBlog < blogs.length - 1) {
currentBlog++;
CallAndLoad(0, blogs[currentBlog]);
}
else {
// We've got all the blogs and posts, time to sort by date. This was done when I was saving to disk only and had not added MongoDB support
function compare(a, b) {
if (a.date > b.date)
return -1;
if (a.date < b.date)
return 1;
return 0;
}
Posts.sort(compare);
fs.writeFileSync("posts.json", JSON.stringify(Posts), "utf8");
console.log("Imported, merged, sorted and wrote to disk " + Posts.length + " posts from " + blogs.length + " blogs in " + ((new Date() - start) / 1000).toString() + " seconds.");
var retval = InsertIntoMongo(Posts);
}
}
else {
// Get the individual posts from the response object and stuff into our global Posts array
for (var j = 0; j < json.response.posts.length; j++) {
Posts.push(json.response.posts[j]);
}
offset += json.response.posts.length;
// Fetch more posts
CallAndLoad(offset, blog);
}
});
});
req.on("error", function (e) {
console.log("Got error: " + e.message);
});
req.end();
}
function InsertIntoMongo(posts) {
var mongodb = require("mongodb").Db;
var Server = require('mongodb').Server;
var db = new mongodb(databaseName, new Server(databaseHost, databasePort, { auto_reconnect: false, poolSize: 4 }), { w: 0, native_parser: false });
db.open(function (err, db) {
if (err) {
console.log(err.message);
}
db.collection(databaseCollection, function (err, collection) {
collection.insert(posts, { w: 1 }, function (err, result) { });
});
db.close();
console.log("Created database, collection and imported " + posts.length + " posts in " + ((new Date() - start) / 1000).toString() + " seconds.");
});
return true;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment