Skip to content

Instantly share code, notes, and snippets.

@abitgone
Last active August 29, 2015 14:05
Show Gist options
  • Save abitgone/bb69192b06b5334be756 to your computer and use it in GitHub Desktop.
Save abitgone/bb69192b06b5334be756 to your computer and use it in GitHub Desktop.
Quick and Dirty Daily Analytics Dump into MongoDB
/*
// Token Generation Script
//
// This will require you to visit the Google Developer Console at https://console.developers.google.com and
// create an app. You'll want to create a web application client ID and grab the client key and secret.
//
// This will then take you through the process of generating the keys in a quick and dirty fashion.
//
// Usual npm install shenanigans apply -- you'll need moment, mongodb and googleapis for this.
*/
// Configuration
var configFile;
try {
configFile = require("config.json"); // See http://s.phuu.net/12PFa6J
}
catch (err) {
configFile = {};
}
var config = {
"dbHostname": process.env.DB_HOSTNAME ||
configFile.dbHostname ||
"localhost",
"dbPort": process.env.DB_PORT ||
configFile.dbPort ||
27017,
"dbDatabase": process.env.DB_DATABASE ||
configFile.dbDatabase ||
"test",
"dbUsername": process.env.DB_USERNAME ||
configFile.dbUsername ||
"",
"dbPassword": process.env.DB_PASSWORD ||
configFile.dbPassword ||
"",
"googleClientId": process.env.GOOGLE_CLIENT_ID ||
configFile.googleClientId ||
"",
"googleClientSecret": process.env.GOOGLE_CLIENT_SECRET ||
configFile.googleClientSecret ||
"",
"googleRedirectUrl": process.env.GOOGLE_REDIRECT_URL ||
configFile.googleRedirectUrl ||
"",
"googleAnalyticsViewId": process.env.GOOGLE_ANALYTICS_VIEW_ID ||
configFile.googleAnalyticsViewId ||
-1
}
var mongo = require("mongodb"),
google = require("googleapis"),
OAuth2 = google.auth.OAuth2;
// Database
var db = new mongo.Db(
config.dbDatabase,
new mongo.Server(
config.dbHostname,
config.dbPort,
{
auto_reconnect: true
}
),
{
w: 0
}
);
// Open the database
db.open(function (openErr, openData) {
if (openData) {
openData.authenticate(
config.dbUsername,
config.dbPassword,
function (authErr, authData) {
if (authData) {
var oauth2client = new OAuth2(config.googleClientId, config.googleClientSecret, config.googleRedirectUrl);
var analytics_auth_url = oauth2client.generateAuthUrl({
access_type: "offline",
scope: [ "https://www.googleapis.com/auth/analytics.readonly" ],
approval_prompt: "force"
});
var readline = require('readline');
var rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
rl.question(["Visit ", analytics_auth_url, " and enter the code parameter: "].join(""), function (answer) {
oauth2client.getToken(answer, function (err, creds) {
if (err) {
console.log(err);
rl.close();
}
else {
var credentials = db.collection("credentials");
credentials.insert(
{
"_id": "google.analytics",
"tokens": creds,
"added": new Date()
},
function (err, insert_result) {
console.log("Credentials should be stored in the database. You can run `node update_analytics.js` now.");
rl.close();
}
);
}
});
});
}
else {
console.log("Update Analytics > db.open > authErr: " + authErr);
db.close();
}
}
);
}
else {
console.log("Update Analytics > db.open > openErr: " + openErr);
}
});
/*
// Analytics Update Script
*/
// Start
console.log("Update Analytics: Starting...");
// Configuration
var configFile;
try {
configFile = require("config.json"); // See http://s.phuu.net/12PFa6J
}
catch (err) {
configFile = {};
}
var config = {
"dbHostname": process.env.DB_HOSTNAME ||
configFile.dbHostname ||
"localhost",
"dbPort": process.env.DB_PORT ||
configFile.dbPort ||
27017,
"dbDatabase": process.env.DB_DATABASE ||
configFile.dbDatabase ||
"test",
"dbUsername": process.env.DB_USERNAME ||
configFile.dbUsername ||
"",
"dbPassword": process.env.DB_PASSWORD ||
configFile.dbPassword ||
"",
"googleClientId": process.env.GOOGLE_CLIENT_ID ||
configFile.googleClientId ||
"",
"googleClientSecret": process.env.GOOGLE_CLIENT_SECRET ||
configFile.googleClientSecret ||
"",
"googleRedirectUrl": process.env.GOOGLE_REDIRECT_URL ||
configFile.googleRedirectUrl ||
"",
"googleAnalyticsViewId": process.env.GOOGLE_ANALYTICS_VIEW_ID ||
configFile.googleAnalyticsViewId ||
-1
}
var mongo = require("mongodb"),
moment = require("moment");
// Database
var db = new mongo.Db(
config.dbDatabase,
new mongo.Server(
config.dbHostname,
config.dbPort,
{
auto_reconnect: true
}
),
{
w: 0
}
);
// Open the database
db.open(function (openErr, openData) {
if (openData) {
openData.authenticate(
config.dbUsername,
config.dbPassword,
function (authErr, authData) {
if (authData) {
// Open the credentials database so we can grab the OAuth tokens
var credentials = db.collection("credentials");
credentials.findOne({"_id": "google.analytics"}, function (err, creds) {
if (err) {
console.log("Update Analytics > credentials.findOne > err: " + err);
return;
}
// Set up the OAuth2 client
var google = require("googleapis"),
OAuth2 = google.auth.OAuth2;
var oauth2client = new OAuth2(config.googleClientId, config.googleClientSecret, config.googleRedirectUrl);
// If there's no refresh_token within the .tokens object, use the one we stored earlier
if (!creds.tokens.refresh_token) creds.tokens.refresh_token = creds.refresh_token;
// Set credentials and refresh the Access token (which only lasts an hour anyway) to prevent inevitable request replays
oauth2client.setCredentials(creds.tokens);
oauth2client.refreshAccessToken(function (err, tokens) {
// Let the log know what we're doing
console.log("Update Analytics: Updating auth tokens...");
if (err) {
// Log an error and halt the process with db.close()
console.log("Update Analytics > Failed to refresh access token: " + err);
db.close();
return;
}
// Store the update∂ access_token in the database
credentials.update(
{
"_id": "google.analytics"
},
{
"$set": {
"tokens": tokens,
"updated": new Date()
}
},
function (err, result) {
if (err) {
// Log an error and halt the process with db.close()
console.log("Update Analytics > Failed to update auth tokens: " + err);
db.close();
return;
}
console.log("Update Analytics: Updated auth tokens.");
// Get the latest entry in the metrics table for "google.analytics.visits"
var metrics = db.collection("metrics");
metrics.aggregate(
[
{ "$sort": { "date": -1 }},
{ "$limit": 1 },
{ "$project": { "_id": "$date" }}
],
function (err, latest_metric) {
if (err) {
// Log an error and halt the process with db.close()
console.log("Update Analytics > Failed to retrieve latest metric from database: " + err);
db.close();
return;
}
var latest_date = new Date(Date.UTC(2014,4-1,26)); /* Hard coded: April 1st, 2014. You might want to choose a more significant date. */
if (latest_metric.length == 1) latest_date = new Date(latest_metric[0]._id);
// Always start with the day BEFORE the latest date, so that we constantly update the previous day.
latest_date.setUTCDate(latest_date.getUTCDate() - 1);
var dates_to_process = [];
while (latest_date < new Date()) {
dates_to_process.push(new Date(latest_date));
latest_date.setUTCDate(latest_date.getUTCDate() + 1);
}
// Keep an eye on the number of dates we have left to receive data for
var remaining_dates = dates_to_process.length;
// Start an interval timer that kicks an update off every 2/10ths of a second (so we don't hit Google Analytics' rate limit of 10rps)
var date_process_timer = setInterval(function () {
// Check to make sure we've still got dates to process
if (dates_to_process.length > 0) {
// Get the first item from the array (shift() is the same as pop(), but takes the first item as opposed to the last)
var date_to_process = dates_to_process.shift();
// Talk to Google Analytics
var analytics = google.analytics("v3");
analytics.data.ga.get(
{
"auth": oauth2client,
"ids": ["ga:", config.googleAnalyticsViewId].join(""),
"start-date": moment(date_to_process).format("YYYY-MM-DD"),
"end-date": moment(date_to_process).format("YYYY-MM-DD"),
"metrics": "ga:visits"
},
function (err, ga_data) {
// Regardless of whether it fails or not, we're done with this item, so decrement the counter
remaining_dates--;
if (err) {
console.log(["Update Analytics > Failed to get analytics data for ", moment(date_to_process).format("YYYY-MM-DD"), ": ", err].join(""));
}
else {
// Upsert the metric in the metrics collection
metrics.update(
{ "_id": ["google.analytics.views.", moment(date_to_process).format("YYYY-MM-DD")].join("") },
{
"$set": {
"date": date_to_process,
"metric": "views",
"value": parseInt(ga_data.totalsForAllResults["ga:visits"])
}
},
{ "upsert": true },
function (err, metric_data) {
if (err) {
console.log(["Update Analytics > Failed to store analytics data for ", moment(date_to_process).format("YYYY-MM-DD"), ": ", err].join(""));
}
else {
console.log(["Update Analytics: Stored analytics data for ", moment(date_to_process).format("YYYY-MM-DD"), "; ", ga_data.totalsForAllResults["ga:visits"], " views."].join(""));
}
}
);
}
}
);
}
else {
// We've emptied the dates_to_process array - stop the interval
clearInterval(date_process_timer);
// Start a new interval to wait for the db updates to finish
var wait_timer = setInterval(function () {
if (remaining_dates == 0) {
console.log("Update Analytics: Done.")
clearInterval(wait_timer);
db.close();
}
}, 200); // Wait interval timer fires every 0.2s
}
}, 200); // Date processing interval timer fires every 0.2s
}
)
}
);
});
});
}
else {
console.log("Update Analytics > db.open > authErr: " + authErr);
db.close();
}
}
);
}
else {
console.log("Update Analytics > db.open > openErr: " + openErr);
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment