Skip to content

Instantly share code, notes, and snippets.

@zaach
Created February 4, 2012 19:59
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save zaach/1739769 to your computer and use it in GitHub Desktop.
Save zaach/1739769 to your computer and use it in GitHub Desktop.
PO parser from http://jsgettext.berlios.de/lib/Gettext.js adapted for Node.js and modified to be more like po2json.pl
#!/usr/bin/env node
/*
PO parser from http://jsgettext.berlios.de/lib/Gettext.js
adapted for Node.js and modified to be more like po2json.pl
- Zach Carter <zcarter@cse.usf.edu>
*/
/*
Pure Javascript implementation of Uniforum message translation.
Copyright (C) 2008 Joshua I. Miller <unrtst@cpan.org>, all rights reserved
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Library General Public License as published
by the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
var fs = require('fs');
var path = require('path');
var argv = process.argv;
var pretty = ~argv.indexOf('-p');
function parse (file) {
fs.readFile(fs.realpathSync(file), 'utf8', function (err, data) {
if (err) throw err;
if (pretty) {
var result = {};
result[path.basename(file, '.po')] = parse_po(data);
// perl JSON encoder uses three spaces (╯°□°)╯︵ ┻━┻
console.log(JSON.stringify(result, null, ' '));
} else {
console.log(JSON.stringify(parse_po(data)));
}
});
}
var context_glue = "\004";
var parse_po = function(data) {
var rv = {};
var buffer = {};
var lastbuffer = "";
var errors = [];
var lines = data.split("\n");
for (var i=0; i<lines.length; i++) {
// chomp
lines[i] = lines[i].replace(/(\n|\r)+$/, '');
var match;
// Empty line / End of an entry.
if (/^$/.test(lines[i])) {
if (typeof(buffer['msgid']) != 'undefined') {
var msg_ctxt_id = (typeof(buffer['msgctxt']) != 'undefined' &&
buffer['msgctxt'].length) ?
buffer['msgctxt']+context_glue+buffer['msgid'] :
buffer['msgid'];
var msgid_plural = (typeof(buffer['msgid_plural']) != 'undefined' &&
buffer['msgid_plural'].length) ?
buffer['msgid_plural'] :
null;
// find msgstr_* translations and push them on
var trans = [];
for (var str in buffer) {
var match;
if (match = str.match(/^msgstr_(\d+)/))
trans[parseInt(match[1])] = buffer[str];
}
trans.unshift(msgid_plural);
// only add it if we've got a translation
// NOTE: this doesn't conform to msgfmt specs
if (trans.length > 1) rv[msg_ctxt_id] = trans;
buffer = {};
lastbuffer = "";
}
// comments
} else if (/^(#[^~]|#$)/.test(lines[i])) {
continue;
// msgctxt
} else if (match = lines[i].match(/^(?:#~ )?msgctxt\s+(.*)/)) {
lastbuffer = 'msgctxt';
buffer[lastbuffer] = parse_po_dequote(match[1]);
// msgid
} else if (match = lines[i].match(/^(?:#~ )?msgid\s+(.*)/)) {
lastbuffer = 'msgid';
buffer[lastbuffer] = parse_po_dequote(match[1]);
// msgid_plural
} else if (match = lines[i].match(/^(?:#~ )?msgid_plural\s+(.*)/)) {
lastbuffer = 'msgid_plural';
buffer[lastbuffer] = parse_po_dequote(match[1]);
// msgstr
} else if (match = lines[i].match(/^(?:#~ )?msgstr\s+(.*)/)) {
lastbuffer = 'msgstr_0';
buffer[lastbuffer] = parse_po_dequote(match[1]);
// msgstr[0] (treak like msgstr)
} else if (match = lines[i].match(/^(?:#~ )?msgstr\[0\]\s+(.*)/)) {
lastbuffer = 'msgstr_0';
buffer[lastbuffer] = parse_po_dequote(match[1]);
// msgstr[n]
} else if (match = lines[i].match(/^(?:#~ )?msgstr\[(\d+)\]\s+(.*)/)) {
lastbuffer = 'msgstr_'+match[1];
buffer[lastbuffer] = parse_po_dequote(match[2]);
// continued string
} else if (/^(?:#~ )?"/.test(lines[i])) {
buffer[lastbuffer] += parse_po_dequote(lines[i]);
// something strange
} else {
errors.push("Strange line ["+i+"] : "+lines[i]);
}
}
// handle the final entry
if (typeof(buffer['msgid']) != 'undefined') {
var msg_ctxt_id = (typeof(buffer['msgctxt']) != 'undefined' &&
buffer['msgctxt'].length) ?
buffer['msgctxt']+context_glue+buffer['msgid'] :
buffer['msgid'];
var msgid_plural = (typeof(buffer['msgid_plural']) != 'undefined' &&
buffer['msgid_plural'].length) ?
buffer['msgid_plural'] :
null;
// find msgstr_* translations and push them on
var trans = [];
for (var str in buffer) {
var match;
if (match = str.match(/^msgstr_(\d+)/))
trans[parseInt(match[1])] = buffer[str];
}
trans.unshift(msgid_plural);
// only add it if we've got a translation
// NOTE: this doesn't conform to msgfmt specs
if (trans.length > 1) rv[msg_ctxt_id] = trans;
buffer = {};
lastbuffer = "";
}
// parse out the header
if (rv[""] && rv[""][1]) {
var cur = {};
var hlines = rv[""][1].split(/\\n/);
for (var i=0; i<hlines.length; i++) {
if (! hlines[i].length) continue;
var pos = hlines[i].indexOf(':', 0);
if (pos != -1) {
var key = hlines[i].substring(0, pos);
var val = hlines[i].substring(pos +1);
if (cur[key] && cur[key].length) {
errors.push("SKIPPING DUPLICATE HEADER LINE: "+hlines[i]);
} else if (/#-#-#-#-#/.test(key)) {
errors.push("SKIPPING ERROR MARKER IN HEADER: "+hlines[i]);
} else {
// remove begining spaces if any (the perl script keeps them)
//val = val.replace(/^\s+/, '');
cur[key] = val;
}
} else {
errors.push("PROBLEM LINE IN HEADER: "+hlines[i]);
cur[hlines[i]] = '';
}
}
// replace header string with assoc array
rv[""] = cur;
} else {
rv[""] = {};
}
// TODO: XXX: if there are errors parsing, what do we want to do?
// GNU Gettext silently ignores errors. So will we.
// alert( "Errors parsing po file:\n" + errors.join("\n") );
if (errors.length) console.warn(errors.join("\n"));
return rv;
};
var parse_po_dequote = function(str) {
var match;
if (match = str.match(/^(?:#~ )?"(.*)"/)) {
str = match[1];
}
str = str.replace(/\\"/g, '"');
return str;
};
if (argv.indexOf('-h') >= 0 || argv.length < 3) {
console.log(path.basename(argv[1])+" {-p} {file.po} > {outputfile.json}\n -p : do pretty-printing of json data\n");
} else {
parse(argv[argv[2] === '-p' ? 3:2]);
}
@danroberts
Copy link

This was extremely helpful. Thanks... I forked it and added some code to let you use it in a node runtime environment. It's a bit rough... doesn't use pretty option, but here it is: https://gist.github.com/2846424

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment