Skip to content

Instantly share code, notes, and snippets.

@bradfordcp
Created May 10, 2011 13:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bradfordcp/964489 to your computer and use it in GitHub Desktop.
Save bradfordcp/964489 to your computer and use it in GitHub Desktop.
Converts the json version of the olson timezone database into MySQL tables. Here we have scoped the directories we are searching to certain continents, that logic may be removed in favor of just passing the root directory into process_dir().
#! /usr/bin/env node
// Require the filesystem library
var fs = require('fs');
// Connect to MySQL
var Client = require('mysql').Client, client = new Client();
client.user = 'rails';
client.connect();
client.query('USE scratch');
// Drop / Create Tables
client.query("DROP TABLE timezones");
client.query("DROP TABLE timezone_ranges");
client.query("CREATE TABLE IF NOT EXISTS `timezones` (`id` int(11) NOT NULL AUTO_INCREMENT, `path` varchar(255) NOT NULL, `name` varchar(255) NOT NULL, PRIMARY KEY (`id`)) ENGINE=MyISAM DEFAULT CHARSET=latin1");
client.query("CREATE TABLE IF NOT EXISTS `timezone_ranges` (`id` int(11) NOT NULL AUTO_INCREMENT, `timezone_id` int(11) NOT NULL, `from` bigint(20) NOT NULL, `to` bigint(20) NOT NULL, `dst` tinyint(1) NOT NULL DEFAULT '0', `offset` int(11) NOT NULL, `name` varchar(255) NOT NULL, PRIMARY KEY (`id`)) ENGINE=MyISAM DEFAULT CHARSET=latin1");
// Declare our queues to work with (this avoids having too many files open at once)
var file_queue = [];
var file_queue_started = false;
var timezone_queue = [];
// Declare some counters to keep track of how many items we have processed
var file_count = 0;
var processed_count = 0;
var range_count = 0;
var processed_ranges_count = 0;
/**
* Processes a directory path, recursively calling itself for any directories encountered. If a file is found it gets added to the file_queue
* variable
*
* @param String path
* Path to the directory to process
*/
var process_dir = function (path) {
fs.readdir(path, function (err, contents) {
if (err) throw err;
// Keep an array of all files and directories encountered within this directory
var files = [];
var directories = [];
// Iterate over the directories contents determining if the entry is a file or directory and place it in the correct array
for (var i in contents) {
var item_path = contents[i];
if (item_path.match(/.*\.json$/)) {
// This is a file not a directory
files.push(item_path);
file_count ++;
}
else {
// This is a directory...
directories.push(item_path);
}
}
// Place all files in the "global" file_queue
for (var i in files) {
file_queue.push(path + "/" + files[i]);
}
// Recursively call process_dir on any directories we find
for (var i in directories) {
process_dir(path + "/" + directories[i]);
}
// If we haven't started processing the file_queue kick that off here. This is all async so elements will be added dynamically while processing
if (!file_queue_started) {
file_queue_started = true;
process_queue();
}
});
};
/**
* Processes the queue of files, basically each time it is called the file_queue is popped and process_file() is called with that element.
*/
var process_queue = function () {
var file = file_queue.pop();
process_file(file);
}
/**
* Processes a file by reading in its contents and converting that into a timezone object and adding that object to the queue of timezones. file_queue()
* is called after processing to process the next file in line. If there are no more files to process the timezone queue starts processing.
*
* @param String file
* Path to the file to be processed
*/
var process_file = function (file) {
processed_count++;
fs.readFile(file, function (err, data) {
if (err) throw err;
var obj = JSON.parse(data);
var name = obj._zone.split("/");
name = name[name.length - 1];
name.replace("_", " ");
var tz = {
path: obj._zone,
name: name,
ranges: obj.zone
};
timezone_queue.push(tz);
range_count += tz.ranges.length;
if (file_queue.length) {
process_queue();
}
else {
console.log("Files: " + file_count);
console.log("Procs: " + processed_count)
console.log("Timezones: " + timezone_queue.length);
process_tz_queue();
}
});
}
/**
* Processes the queue of timezones, basically each time it is called the timezone_queue is popped and process_tz() is called with that element.
*/
var process_tz_queue = function () {
var tz = timezone_queue.pop();
process_tz(tz);
}
/**
* Processes the passed in timezone by inserting it into the appropriate MySQL tables. After inserting process_tz_queue is called or if there are no more
* elements to process the MySQL connection is closed.
*
* @param Object tz
* Object representing the timezone data we retrieved earlier
*/
var process_tz = function (tz) {
// Insert the timezone and it's range
client.query("INSERT INTO timezones (id, path, name) VALUES (NULL, ?, ?)", [tz.path, tz.name], function (err, results, fields) {
if (err) throw err;
if (results.insertId != undefined) {
// Build the SQL statement to insert the ranges
var sql = "INSERT INTO timezone_ranges (`id`, `timezone_id`, `from`, `to`, `dst`, `offset`, `name`) VALUES ";
var sql_args = [];
for (var i in tz.ranges) {
var range = tz.ranges[i];
sql += "(NULL, ?, ?, ?, ?, ?, ?), ";
sql_args.push(results.insertId);
sql_args.push(range.from);
sql_args.push(range.to);
sql_args.push(range.dst ? 1 : 0);
sql_args.push(range.offset);
sql_args.push(range.name);
processed_ranges_count ++;
}
sql = sql.slice(0,-2);
client.query(sql, sql_args, function (err, results, fields) {
if (err) throw err;
if (timezone_queue.length) {
process_tz_queue();
}
else {
client.end();
console.log("Ranges: " + range_count)
console.log("Proc'd Ranges: " + processed_ranges_count);
}
});
}
});
}
// Array of areas we want to process
var tz_areas = ['Africa', 'America', 'Antarctica', 'Arctic', 'Asia', 'Atlantic', 'Australia', 'Europe', 'Indian', 'Pacific'];
// Iterate over the timezone areas and call process the directories appropriately. Note if we wanted to process all of them we would call process_dir("/timezones/") instead of dealing with this loop and the array.
for (var i in tz_areas) {
var area = tz_areas[i];
var path = "/timezones/" + area;
process_dir(path);
}
@bradfordcp
Copy link
Author

I had to use queues as I was getting an error reporting too many files were open. This also uses the node mysql client (npm install mysql).

@bradfordcp
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment