Skip to content

Instantly share code, notes, and snippets.

@mathrick
Created November 25, 2012 17:31
Show Gist options
  • Save mathrick/4144465 to your computer and use it in GitHub Desktop.
Save mathrick/4144465 to your computer and use it in GitHub Desktop.
Look ma, Imma perl
#!/usr/bin/perl
use warnings;
use strict;
my $PACKAGE = "AdvoWEB";
my $VERSION = "1.0";
use Time::gmtime qw(:FIELDS);
use HTML::Parser;
use Locale::PO;
my $js_str_re = qr{
" (?: [^\\"] | \\. )* "
|
' (?: [^\\'] | \\. )* '
}x;
sub extract_strings {
my ($text) = @_;
my @r;
while (
$text =~ m{
\b _ \s* \(
\s*
($js_str_re)
\s*
(?: , \s* $js_str_re \s* )?
\)
}xg
) {
push @r, substr $1, 1, -1; # strip quotes
}
@r
}
@ARGV == 1 or die "Usage: $0 HTMLFILE\n";
my $infile = shift @ARGV;
open my $fh, '<:encoding(UTF-8)', $infile or die "$0: $infile: $!\n";
my %stringset;
my $parser = HTML::Parser->new(
api_version => 3,
start_h => [
sub {
my $line = shift;
while (my ($k, $v) = splice @_, 0, 2) {
push @{$stringset{$_}}, $line for extract_strings $v;
}
},
'line, @attr'
],
text_h => [
sub {
my $line = shift;
my ($text) = @_;
while ($text =~ /\{\{(.*?)\}\}/g) {
push @{$stringset{$_}}, $line for extract_strings $1;
}
},
'line, dtext'
],
);
$parser->empty_element_tags(1);
$parser->unbroken_text(1);
$parser->parse_file($fh);
# This populates $tm_*, thanks to qw(:FIELDS) in use Time::gmtime
gmtime();
my @out;
push @out, new Locale::PO(-msgid=>'', -msgstr=>
"Project-Id-Version: $PACKAGE $VERSION\\n" .
sprintf("PO-Revision-Date: %d-%02d-%02d %02d:%02d GMT\\n", $tm_year + 1900, $tm_mon, $tm_mday, $tm_hour, $tm_min) .
"MIME-Version: 1.0\\n" .
"Content-Type: text/plain; charset=UTF-8\\n");
while (my ($k, $v) = each %stringset) {
push @out, Locale::PO->new(-msgid => $k,
-reference => join(" ", map { "$infile:$_" } @{$v}),
-msgstr => "");
}
# Locale::PO is stupid and takes filenames, not file handles for output
foreach(@out) {
print $_->dump;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment