This was a fun little script to write.
Note, for this to work, you curerntly need my forks of:
What it does:
- Opens an XLXS file
- Extracts all XML files into memory
- Parses the sharedStrings.xml file and stores the data in an array.
- Parses the first worksheet and stores the data in a hash, with the cell address as the key.
Less than 100 lines!
use NativeCall;
use LibZip::NativeCall;
use XML::Actions;
my $filename = "/path/to/file.xlsx";
my $err = Pointer[int32].new;
die "Could not open Zip file: { $err ?? $err.deref !! 'UNKNOWN'}"
unless ( my $z = zip_open($filename, 0, $err) );
my %zip-entries;
for ^zip_get_num_entries($z, 0) {
my $sb = zip_stat.new;
unless zip_stat_index($z, $_, 0, $sb) {
next unless $sb.name.ends-with('.xml');
my $zf = zip_fopen_index($z, $_, 0);
my $b = Buf.allocate($sb.size);
zip_fread($zf, $b, $b.elems);
%zip-entries{$sb.name} = $b;
}
}
zip_close($z);
my @strings;
my class SharedStrings is XML::Actions::Work {
has $!text;
method t:start (Array $parent-path) {
$!text = '';
}
method t:end (Array $parent-path) {
@strings.push: $!text;
}
method xml:text (Array $parent-path, Str $text) {
$!text ~= $text;
}
}
my %cellData;
my class XLSX is XML::Actions::Work {
has $!text;
method c:start (Array $parent-path, :$r, :$s, :$t) {
$!text = '';
}
method c:end (Array $parent-path, :$r, :$s, :$t) {
%cellData{$r} = ($t // '') eq 's' ??
@strings[$!text]
!!
$!text;
}
method xml:text (Array $parent-path, Str $text) {
$!text ~= $text;
}
}
my $x = XML::Actions.new( xml => %zip-entries<xl/sharedStrings.xml>.decode );
$x.process( actions => SharedStrings.new );
$x = XML::Actions.new( xml => %zip-entries<xl/worksheets/sheet1.xml>.decode );
$x.process( actions => XLSX.new() );
my regex cell { (\w+) (\d+) }
.gist.say for %cellData.pairs.sort({
my $am = $^a.key ~~ &cell;
my $bm = $^b.key ~~ &cell;
$am[0] cmp $bm[0] || $am[1] <=> $bm[1]
});
This is why I love perl6.