Skip to content

Instantly share code, notes, and snippets.

@cadecairos
Last active June 23, 2016 14:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cadecairos/44e3d34845d0f438779a to your computer and use it in GitHub Desktop.
Save cadecairos/44e3d34845d0f438779a to your computer and use it in GitHub Desktop.
BEGIN;
CREATE TABLE users
(
id bigint PRIMARY KEY,
username varchar UNIQUE,
email varchar UNIQUE
);
CREATE TABLE projects
(
html text,
email varchar,
title varchar,
"createdAt" date,
"updatedAt" date
);
COMMIT;
// import-thimble-projects.js
// Imports old thimble projects into a publish database
//
// execute as:
// node scripts/import-thimble-projects.js <legacyProjectConnString> <publishConnString> <limit> <offset>
'use strict';
const knex = require('knex');
const Hoek = require('hoek');
const legacyProjectConnString = process.argv[2];
const publishConnString = process.argv[3];
const limit = process.argv[4] || 50000;
const offset = process.argv[5] || 0;
Hoek.assert(legacyProjectConnString, 'You must provide a connection string to a database containing legacy project data');
Hoek.assert(publishConnString, 'You must provide a connection string to a publish database');
const legacyProjectClient = knex({
client: 'pg',
connection: legacyProjectConnString
});
const publishClient = knex({
client: 'pg',
connection: publishConnString
});
const publishUserIdsByEmail = require('./userMap.json');
function migrateOldProjects() {
console.info(`Fetching projects ${offset}-${(+offset) + (+limit)}`);
return legacyProjectClient('projects')
.join('users', 'users.email', '=', 'projects.email')
.select(
'projects.html',
'projects.email',
'projects.title',
'projects.createdAt',
'projects.updatedAt',
'users.id',
'users.username'
)
.limit(limit)
.offset(offset)
.then(handleOldProjects);
}
function handleOldProjects(oldProjects) {
console.info(`Migrating ${oldProjects.length} projects into publish database...`);
return publishClient.transaction((txn) => {
Promise.all(
oldProjects.map(migrateProject)
)
.then(txn.commit)
.catch(txn.rollback);
});
}
function migrateProject(oldProject) {
return createPublishProject(oldProject)
.then(createPublishFile);
}
function createPublishProject(oldProject) {
return publishClient('projects')
.insert({
title: oldProject.title,
_date_created: new Date(oldProject.createdAt).toISOString(),
_date_updated: new Date(oldProject.updatedAt).toISOString(),
user_id: publishUserIdsByEmail[oldProject.email]
})
.returning('id')
.then((result) => {
return {
publishId: result[0],
oldProject
}
});
}
function createPublishFile(projectData) {
return publishClient('files')
.insert({
path: '/index.html',
project_id: projectData.publishId,
buffer: new Buffer(projectData.oldProject.html)
});
}
console.info('Beginning migration...');
migrateOldProjects()
.then(() => {
console.log('All done!');
process.exit(0);
})
.catch((err) => {
console.log('Error: ', err);
process.exit(1);
});
// import-thimble-projects.js
// Imports old thimble projects into a publish database
//
// execute as:
// node scripts/import-thimble-projects.js <legacyProjectConnString> <publishConnString>
'use strict';
const knex = require('knex');
const Hoek = require('hoek');
const fs = require('fs');
const legacyProjectConnString = process.argv[2];
const publishConnString = process.argv[3];
Hoek.assert(legacyProjectConnString, 'You must provide a connection string to a database containing legacy project data');
Hoek.assert(publishConnString, 'You must provide a connection string to a publish database');
const legacyProjectClient = knex({
client: 'pg',
connection: legacyProjectConnString
});
const publishClient = knex({
client: 'pg',
connection: publishConnString
});
function checkAndAddUsers() {
console.info('Fetching users...');
return legacyProjectClient('users')
.distinct('username')
.select('username', 'email')
.then(mapUsers);
}
function mapUsers(users) {
return publishClient.transaction((txn) => {
console.info(`Ensuring all ${users.length} users exist in publish database...`);
return Promise.all(
users.map(checkExistence)
)
.then(txn.commit)
.catch(txn.rollback)
});
}
function checkExistence(user) {
return publishClient('users')
.select()
.where('name', '=', user.username)
.then(function(result) {
if (result.length === 0) {
return insertPublishUser(user);
}
publishUserIdsByEmail[user.email] = result[0].id;
});
}
function insertPublishUser(user) {
return publishClient('users')
.insert({
name: user.username
})
.returning('id')
.then(function(result) {
publishUserIdsByEmail[user.email] = result[0];
});
}
function writeUserMap() {
console.info("writing publishUserIdsByEmail to userMap.json...");
return new Promise((resolve, reject) => {
fs.writeFile('userMap.json', JSON.stringify(publishUserIdsByEmail, null, 2), function (err) {
if (err) {
return reject(err);
}
resolve();
});
});
}
const publishUserIdsByEmail = {};
console.info('Beginning migration...');
checkAndAddUsers()
.then(writeUserMap)
.then(() => {
console.log('All done!');
process.exit(0);
})
.catch((err) => {
console.log('Error: ', err);
process.exit(1);
});

Preparing import data

Tools used

Since projects take up about 750Mib of disk space, loading them all into a node process is not possible. Therefore, I broke the projects up into 50000 project chunks. You're probably also going to want to manuall increase the Heap size available to V8 using node --max_old_space_size=2000000

Query and save user ids, emails and usernames, save them to json using MySQL Workbench:

SELECT 
    id, username, email
FROM
    wmlogin.Users
WHERE
    deletedAt IS NULL;

Query and save thimble project data as json using MySQL Workbench:

SELECT 
    userid AS email,
    rawData AS html,
    title,
    createdAt,
    updatedAt
FROM
    thimble.ThimbleProjects
WHERE
    rawData != ''
LIMIT 50000
OFFSET 0;

The html data might (read: probably will) have control characters embedded in it. These can be remove like so:

$ tr -cd '\11\12\15\40-\176' < thimble_projects.json > thimble_projects_fixed.json

Create the database and tables you need (see sql files in this gist):\

$ createdb migrate_thimble
$ psql -d migrate_thimble -f create_tables.sql

import data into a local pg database using the pg-json-import tool:

$ pji --file login_data.json --connection postgres://localhost:5432/migrate_thimble --table users
$ pji --file thimble_projects_fixed.json --connection postgres://localhost:5432/migrate_thimble --table projects

run import-thimble-users.js

node --max_old_space_size=2000000 import-thimble-projects.js <project_migration_db_connection_string> <publish_db_connection_string>

run import-thimble-projects.js

node --max_old_space_size=2000000 import-thimble-projects.js <project_migration_db_connection_string> <publish_db_connection_string> 50000 0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment