Skip to content

Instantly share code, notes, and snippets.

@olegwtf
Created July 22, 2014 10:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save olegwtf/f7ae6b77a89703e7ab54 to your computer and use it in GitHub Desktop.
Save olegwtf/f7ae6b77a89703e7ab54 to your computer and use it in GitHub Desktop.
use strict;
use Text::CSV;
use Text::Trim;
use HTML::Entities;
binmode STDOUT, ":utf8";
my $dir = shift
or die "usage: $0 dir > output";
my @epic = map { "$_.xml" } (-1..25);
my $csv = Text::CSV->new({eol => "\n", binary => 1});
for my $f (@epic) {
open my $fh, "<:utf8", "$dir/$f" or die $!;
while (my @rows = node2rows($fh)) {
for my $row (@rows) {
$csv->print(\*STDOUT, $row);
}
}
close $fh;
}
sub node2rows {
my $fh = shift;
my $line = <$fh>
or return;
trim($line);
if ($line =~ m!^</?Cards>$!) {
return node2rows($fh);
}
my ($id) = $line =~ /<CardContacts CardCode="(\d+)">/ or die "start of the card failed: $line";
my @rows;
while (1) {
$line = <$fh> or die "unexpected end of file";
trim($line);
if ($line eq '</CardContacts>') {
unless (@rows) {
die "Empty rows set";
}
return @rows;
}
my ($type, $value) = $line =~ /<(\w+)\s+Value="([^"]+)"\s+IsCommonContact="true"\s+NotPublish="false"/ or die "bad line";
decode_entities($value);
push @rows, [$id, $type, $value];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment